Line data Source code
1 : /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : // for S-JIS encoding, obeserve characteristic:
7 : // 1, kana character (or hankaku?) often have hight frequency of appereance
8 : // 2, kana character often exist in group
9 : // 3, certain combination of kana is never used in japanese language
10 :
11 : #include "nsSJISProber.h"
12 : #include "nsDebug.h"
13 :
14 0 : void nsSJISProber::Reset(void)
15 : {
16 0 : mCodingSM->Reset();
17 0 : mState = eDetecting;
18 0 : mContextAnalyser.Reset();
19 0 : mDistributionAnalyser.Reset();
20 0 : }
21 :
22 0 : nsProbingState nsSJISProber::HandleData(const char* aBuf, uint32_t aLen)
23 : {
24 0 : NS_ASSERTION(aLen, "HandleData called with empty buffer");
25 : nsSMState codingState;
26 :
27 0 : for (uint32_t i = 0; i < aLen; i++)
28 : {
29 0 : codingState = mCodingSM->NextState(aBuf[i]);
30 0 : if (codingState == eItsMe)
31 : {
32 0 : mState = eFoundIt;
33 0 : break;
34 : }
35 0 : if (codingState == eStart)
36 : {
37 0 : uint32_t charLen = mCodingSM->GetCurrentCharLen();
38 0 : if (i == 0)
39 : {
40 0 : mLastChar[1] = aBuf[0];
41 0 : mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen);
42 0 : mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
43 : }
44 : else
45 : {
46 0 : mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen);
47 0 : mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
48 : }
49 : }
50 : }
51 :
52 0 : mLastChar[0] = aBuf[aLen-1];
53 :
54 0 : if (mState == eDetecting)
55 0 : if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
56 0 : mState = eFoundIt;
57 :
58 0 : return mState;
59 : }
60 :
61 0 : float nsSJISProber::GetConfidence(void)
62 : {
63 0 : float contxtCf = mContextAnalyser.GetConfidence();
64 0 : float distribCf = mDistributionAnalyser.GetConfidence();
65 :
66 0 : return (contxtCf > distribCf ? contxtCf : distribCf);
67 : }
68 :
|