Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : #ifndef __JPCNTX_H__
7 : #define __JPCNTX_H__
8 :
9 : #define NUM_OF_CATEGORY 6
10 :
11 : #include "nscore.h"
12 :
13 : #define ENOUGH_REL_THRESHOLD 100
14 : #define MAX_REL_THRESHOLD 1000
15 :
16 : //hiragana frequency category table
17 : extern const uint8_t jp2CharContext[83][83];
18 :
19 : class JapaneseContextAnalysis
20 : {
21 : public:
22 0 : JapaneseContextAnalysis() {Reset();}
23 :
24 : void HandleData(const char* aBuf, uint32_t aLen);
25 :
26 0 : void HandleOneChar(const char* aStr, uint32_t aCharLen)
27 : {
28 : int32_t order;
29 :
30 : //if we received enough data, stop here
31 0 : if (mTotalRel > MAX_REL_THRESHOLD) mDone = true;
32 0 : if (mDone) return;
33 :
34 : //Only 2-bytes characters are of our interest
35 0 : order = (aCharLen == 2) ? GetOrder(aStr) : -1;
36 0 : if (order != -1 && mLastCharOrder != -1)
37 : {
38 0 : mTotalRel++;
39 : //count this sequence to its category counter
40 0 : mRelSample[jp2CharContext[mLastCharOrder][order]]++;
41 : }
42 0 : mLastCharOrder = order;
43 : }
44 :
45 : float GetConfidence(void);
46 : void Reset();
47 0 : bool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;}
48 :
49 : protected:
50 : virtual int32_t GetOrder(const char* str, uint32_t *charLen) = 0;
51 : virtual int32_t GetOrder(const char* str) = 0;
52 :
53 : //category counters, each integer counts sequences in its category
54 : uint32_t mRelSample[NUM_OF_CATEGORY];
55 :
56 : //total sequence received
57 : uint32_t mTotalRel;
58 :
59 : //Number of sequences needed to trigger detection
60 : uint32_t mDataThreshold;
61 :
62 : //The order of previous char
63 : int32_t mLastCharOrder;
64 :
65 : //if last byte in current buffer is not the last byte of a character, we
66 : //need to know how many byte to skip in next buffer.
67 : uint32_t mNeedToSkipCharNum;
68 :
69 : //If this flag is set to true, detection is done and conclusion has been made
70 : bool mDone;
71 : };
72 :
73 :
74 0 : class SJISContextAnalysis : public JapaneseContextAnalysis
75 : {
76 : //SJISContextAnalysis(){};
77 : protected:
78 : int32_t GetOrder(const char* str, uint32_t *charLen);
79 :
80 0 : int32_t GetOrder(const char* str)
81 : {
82 : //We only interested in Hiragana, so first byte is '\202'
83 0 : if (*str == '\202' &&
84 0 : (unsigned char)*(str+1) >= (unsigned char)0x9f &&
85 0 : (unsigned char)*(str+1) <= (unsigned char)0xf1)
86 0 : return (unsigned char)*(str+1) - (unsigned char)0x9f;
87 0 : return -1;
88 : }
89 : };
90 :
91 0 : class EUCJPContextAnalysis : public JapaneseContextAnalysis
92 : {
93 : protected:
94 : int32_t GetOrder(const char* str, uint32_t *charLen);
95 0 : int32_t GetOrder(const char* str)
96 : //We only interested in Hiragana, so first byte is '\244'
97 : {
98 0 : if (*str == '\244' &&
99 0 : (unsigned char)*(str+1) >= (unsigned char)0xa1 &&
100 0 : (unsigned char)*(str+1) <= (unsigned char)0xf3)
101 0 : return (unsigned char)*(str+1) - (unsigned char)0xa1;
102 0 : return -1;
103 : }
104 : };
105 :
106 : #endif /* __JPCNTX_H__ */
107 :
|