Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : **********************************************************************
5 : * Copyright (C) 2005-2015, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : **********************************************************************
8 : */
9 :
10 : #ifndef __CSR2022_H
11 : #define __CSR2022_H
12 :
13 : #include "unicode/utypes.h"
14 :
15 : #if !UCONFIG_NO_CONVERSION
16 :
17 : #include "csrecog.h"
18 :
19 : U_NAMESPACE_BEGIN
20 :
21 : class CharsetMatch;
22 :
23 : /**
24 : * class CharsetRecog_2022 part of the ICU charset detection imlementation.
25 : * This is a superclass for the individual detectors for
26 : * each of the detectable members of the ISO 2022 family
27 : * of encodings.
28 : *
29 : * The separate classes are nested within this class.
30 : *
31 : * @internal
32 : */
33 0 : class CharsetRecog_2022 : public CharsetRecognizer
34 : {
35 :
36 : public:
37 : virtual ~CharsetRecog_2022() = 0;
38 :
39 : protected:
40 :
41 : /**
42 : * Matching function shared among the 2022 detectors JP, CN and KR
43 : * Counts up the number of legal an unrecognized escape sequences in
44 : * the sample of text, and computes a score based on the total number &
45 : * the proportion that fit the encoding.
46 : *
47 : *
48 : * @param text the byte buffer containing text to analyse
49 : * @param textLen the size of the text in the byte.
50 : * @param escapeSequences the byte escape sequences to test for.
51 : * @return match quality, in the range of 0-100.
52 : */
53 : int32_t match_2022(const uint8_t *text,
54 : int32_t textLen,
55 : const uint8_t escapeSequences[][5],
56 : int32_t escapeSequences_length) const;
57 :
58 : };
59 :
60 0 : class CharsetRecog_2022JP :public CharsetRecog_2022
61 : {
62 : public:
63 : virtual ~CharsetRecog_2022JP();
64 :
65 : const char *getName() const;
66 :
67 : UBool match(InputText *textIn, CharsetMatch *results) const;
68 : };
69 :
70 : #if !UCONFIG_ONLY_HTML_CONVERSION
71 0 : class CharsetRecog_2022KR :public CharsetRecog_2022 {
72 : public:
73 : virtual ~CharsetRecog_2022KR();
74 :
75 : const char *getName() const;
76 :
77 : UBool match(InputText *textIn, CharsetMatch *results) const;
78 :
79 : };
80 :
81 0 : class CharsetRecog_2022CN :public CharsetRecog_2022
82 : {
83 : public:
84 : virtual ~CharsetRecog_2022CN();
85 :
86 : const char* getName() const;
87 :
88 : UBool match(InputText *textIn, CharsetMatch *results) const;
89 : };
90 : #endif
91 :
92 : U_NAMESPACE_END
93 :
94 : #endif
95 : #endif /* __CSR2022_H */
|