LCOV - code coverage report
Current view: top level - extensions/universalchardet/src/base - nsCharSetProber.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 41 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 2 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : #include "nsCharSetProber.h"
       7             : 
       8             : //This filter applies to all scripts which do not use English characters
       9           0 : bool nsCharSetProber::FilterWithoutEnglishLetters(const char* aBuf, uint32_t aLen, char** newBuf, uint32_t& newLen)
      10             : {
      11             :   char *newptr;
      12             :   char *prevPtr, *curPtr;
      13             : 
      14           0 :   bool meetMSB = false;
      15           0 :   newptr = *newBuf = (char*)malloc(aLen);
      16           0 :   if (!newptr)
      17           0 :     return false;
      18             : 
      19           0 :   for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
      20             :   {
      21           0 :     if (*curPtr & 0x80)
      22             :     {
      23           0 :       meetMSB = true;
      24             :     }
      25           0 :     else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z')
      26             :     {
      27             :       //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
      28           0 :       if (meetMSB && curPtr > prevPtr)
      29             :       //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
      30             :       {
      31           0 :         while (prevPtr < curPtr) *newptr++ = *prevPtr++;
      32           0 :         prevPtr++;
      33           0 :         *newptr++ = ' ';
      34           0 :         meetMSB = false;
      35             :       }
      36             :       else //ignore current segment. (either because it is just a symbol or just an English word)
      37           0 :         prevPtr = curPtr+1;
      38             :     }
      39             :   }
      40           0 :   if (meetMSB && curPtr > prevPtr)
      41           0 :     while (prevPtr < curPtr) *newptr++ = *prevPtr++;
      42             : 
      43           0 :   newLen = newptr - *newBuf;
      44             : 
      45           0 :   return true;
      46             : }
      47             : 
      48             : //This filter applies to all scripts which contain both English characters and upper ASCII characters.
      49           0 : bool nsCharSetProber::FilterWithEnglishLetters(const char* aBuf, uint32_t aLen, char** newBuf, uint32_t& newLen)
      50             : {
      51             :   //do filtering to reduce load to probers
      52             :   char *newptr;
      53             :   char *prevPtr, *curPtr;
      54           0 :   bool isInTag = false;
      55             : 
      56           0 :   newptr = *newBuf = (char*)malloc(aLen);
      57           0 :   if (!newptr)
      58           0 :     return false;
      59             : 
      60           0 :   for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
      61             :   {
      62           0 :     if (*curPtr == '>')
      63           0 :       isInTag = false;
      64           0 :     else if (*curPtr == '<')
      65           0 :       isInTag = true;
      66             : 
      67           0 :     if (!(*curPtr & 0x80) &&
      68           0 :         (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
      69             :     {
      70           0 :       if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol
      71             :                                         // and it is not inside a tag, keep it.
      72             :       {
      73           0 :         while (prevPtr < curPtr) *newptr++ = *prevPtr++;
      74           0 :         prevPtr++;
      75           0 :         *newptr++ = ' ';
      76             :       }
      77             :       else
      78           0 :         prevPtr = curPtr+1;
      79             :     }
      80             :   }
      81             : 
      82             :   // If the current segment contains more than just a symbol
      83             :   // and it is not inside a tag then keep it.
      84           0 :   if (!isInTag)
      85           0 :     while (prevPtr < curPtr)
      86           0 :       *newptr++ = *prevPtr++;
      87             : 
      88           0 :   newLen = newptr - *newBuf;
      89             : 
      90           0 :   return true;
      91             : }

Generated by: LCOV version 1.13