LCOV - code coverage report
Current view: top level - extensions/universalchardet/src/base - nsLatin1Prober.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 37 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 3 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : #include "nsLatin1Prober.h"
       7             : #include <stdio.h>
       8             : 
       9             : #define UDF    0        // undefined
      10             : #define OTH    1        //other
      11             : #define ASC    2        // ascii capital letter
      12             : #define ASS    3        // ascii small letter
      13             : #define ACV    4        // accent capital vowel
      14             : #define ACO    5        // accent capital other
      15             : #define ASV    6        // accent small vowel
      16             : #define ASO    7        // accent small other
      17             : #define CLASS_NUM   8    // total classes
      18             : 
      19             : static const unsigned char Latin1_CharToClass[] =
      20             : {
      21             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 00 - 07
      22             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 08 - 0F
      23             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 10 - 17
      24             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 18 - 1F
      25             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 20 - 27
      26             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 28 - 2F
      27             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 30 - 37
      28             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 38 - 3F
      29             :   OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   // 40 - 47
      30             :   ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   // 48 - 4F
      31             :   ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   // 50 - 57
      32             :   ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   // 58 - 5F
      33             :   OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   // 60 - 67
      34             :   ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   // 68 - 6F
      35             :   ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   // 70 - 77
      36             :   ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   // 78 - 7F
      37             :   OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   // 80 - 87
      38             :   OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   // 88 - 8F
      39             :   UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 90 - 97
      40             :   OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   // 98 - 9F
      41             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // A0 - A7
      42             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // A8 - AF
      43             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // B0 - B7
      44             :   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // B8 - BF
      45             :   ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   // C0 - C7
      46             :   ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   // C8 - CF
      47             :   ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   // D0 - D7
      48             :   ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   // D8 - DF
      49             :   ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   // E0 - E7
      50             :   ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   // E8 - EF
      51             :   ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   // F0 - F7
      52             :   ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   // F8 - FF
      53             : };
      54             : 
      55             : 
      56             : /* 0 : illegal
      57             :    1 : very unlikely
      58             :    2 : normal
      59             :    3 : very likely
      60             : */
      61             : static const unsigned char Latin1ClassModel[] =
      62             : {
      63             : /*      UDF OTH ASC ASS ACV ACO ASV ASO  */
      64             : /*UDF*/  0,  0,  0,  0,  0,  0,  0,  0,
      65             : /*OTH*/  0,  3,  3,  3,  3,  3,  3,  3,
      66             : /*ASC*/  0,  3,  3,  3,  3,  3,  3,  3,
      67             : /*ASS*/  0,  3,  3,  3,  1,  1,  3,  3,
      68             : /*ACV*/  0,  3,  3,  3,  1,  2,  1,  2,
      69             : /*ACO*/  0,  3,  3,  3,  3,  3,  3,  3,
      70             : /*ASV*/  0,  3,  1,  3,  1,  1,  1,  3,
      71             : /*ASO*/  0,  3,  1,  3,  1,  1,  3,  3,
      72             : };
      73             : 
      74           0 : void  nsLatin1Prober::Reset(void)
      75             : {
      76           0 :   mState = eDetecting;
      77           0 :   mLastCharClass = OTH;
      78           0 :   for (int i = 0; i < FREQ_CAT_NUM; i++)
      79           0 :     mFreqCounter[i] = 0;
      80           0 : }
      81             : 
      82             : 
      83           0 : nsProbingState nsLatin1Prober::HandleData(const char* aBuf, uint32_t aLen)
      84             : {
      85           0 :   char *newBuf1 = 0;
      86           0 :   uint32_t newLen1 = 0;
      87             : 
      88           0 :   if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
      89           0 :     newBuf1 = (char*)aBuf;
      90           0 :     newLen1 = aLen;
      91             :   }
      92             : 
      93             :   unsigned char charClass;
      94             :   unsigned char freq;
      95           0 :   for (uint32_t i = 0; i < newLen1; i++)
      96             :   {
      97           0 :     charClass = Latin1_CharToClass[(unsigned char)newBuf1[i]];
      98           0 :     freq = Latin1ClassModel[mLastCharClass*CLASS_NUM + charClass];
      99           0 :     if (freq == 0) {
     100           0 :       mState = eNotMe;
     101           0 :       break;
     102             :     }
     103           0 :     mFreqCounter[freq]++;
     104           0 :     mLastCharClass = charClass;
     105             :   }
     106             : 
     107           0 :   if (newBuf1 != aBuf)
     108           0 :     free(newBuf1);
     109             : 
     110           0 :   return mState;
     111             : }
     112             : 
     113           0 : float nsLatin1Prober::GetConfidence(void)
     114             : {
     115           0 :   if (mState == eNotMe)
     116           0 :     return 0.01f;
     117             : 
     118             :   float confidence;
     119           0 :   uint32_t total = 0;
     120           0 :   for (int32_t i = 0; i < FREQ_CAT_NUM; i++)
     121           0 :     total += mFreqCounter[i];
     122             : 
     123           0 :   if(!total)
     124           0 :     confidence = 0.0f;
     125             :   else
     126             :   {
     127           0 :     confidence = mFreqCounter[3]*1.0f / total;
     128           0 :     confidence -= mFreqCounter[1]*20.0f/total;
     129             :   }
     130             : 
     131           0 :   if (confidence < 0.0f)
     132           0 :     confidence = 0.0f;
     133             : 
     134             :   // lower the confidence of latin1 so that other more accurate detector
     135             :   // can take priority.
     136           0 :   confidence *= 0.50f;
     137             : 
     138           0 :   return confidence;
     139             : }
     140             : 
     141             : #ifdef DEBUG_chardet
     142             : void  nsLatin1Prober::DumpStatus()
     143             : {
     144             :   printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
     145             : }
     146             : #endif
     147             : 
     148             : 

Generated by: LCOV version 1.13