Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : **********************************************************************
5 : * Copyright (C) 2005-2012, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : **********************************************************************
8 : */
9 :
10 : #ifndef __CSRECOG_H
11 : #define __CSRECOG_H
12 :
13 : #include "unicode/uobject.h"
14 :
15 : #if !UCONFIG_NO_CONVERSION
16 :
17 : #include "inputext.h"
18 :
19 : U_NAMESPACE_BEGIN
20 :
21 : class CharsetMatch;
22 :
23 0 : class CharsetRecognizer : public UMemory
24 : {
25 : public:
26 : /**
27 : * Get the IANA name of this charset.
28 : * Note that some recognizers can recognize more than one charset, but that this API
29 : * assumes just one name per recognizer.
30 : * TODO: need to account for multiple names in public API that enumerates over the
31 : * known detectable charsets.
32 : * @return the charset name.
33 : */
34 : virtual const char *getName() const = 0;
35 :
36 : /**
37 : * Get the ISO language code for this charset.
38 : * @return the language code, or <code>null</code> if the language cannot be determined.
39 : */
40 : virtual const char *getLanguage() const;
41 :
42 : /*
43 : * Try the given input text against this Charset, and fill in the results object
44 : * with the quality of the match plus other information related to the match.
45 : *
46 : * Return TRUE if the the input bytes are a potential match, and
47 : * FALSE if the input data is not compatible with, or illegal in this charset.
48 : */
49 : virtual UBool match(InputText *textIn, CharsetMatch *results) const = 0;
50 :
51 : virtual ~CharsetRecognizer();
52 : };
53 :
54 : U_NAMESPACE_END
55 :
56 : #endif
57 : #endif /* __CSRECOG_H */
|