Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : *
6 : * Copyright (C) 2008-2011, International Business Machines
7 : * Corporation, Google and others. All Rights Reserved.
8 : *
9 : *******************************************************************************
10 : */
11 : /*
12 : * Author : eldawy@google.com (Mohamed Eldawy)
13 : * ucnvsel.h
14 : *
15 : * Purpose: To generate a list of encodings capable of handling
16 : * a given Unicode text
17 : *
18 : * Started 09-April-2008
19 : */
20 :
21 : #ifndef __ICU_UCNV_SEL_H__
22 : #define __ICU_UCNV_SEL_H__
23 :
24 : #include "unicode/utypes.h"
25 :
26 : #if !UCONFIG_NO_CONVERSION
27 :
28 : #include "unicode/uset.h"
29 : #include "unicode/utf16.h"
30 : #include "unicode/uenum.h"
31 : #include "unicode/ucnv.h"
32 : #include "unicode/localpointer.h"
33 :
34 : /**
35 : * \file
36 : *
37 : * A converter selector is built with a set of encoding/charset names
38 : * and given an input string returns the set of names of the
39 : * corresponding converters which can convert the string.
40 : *
41 : * A converter selector can be serialized into a buffer and reopened
42 : * from the serialized form.
43 : */
44 :
45 : /**
46 : * @{
47 : * The selector data structure
48 : */
49 : struct UConverterSelector;
50 : typedef struct UConverterSelector UConverterSelector;
51 : /** @} */
52 :
53 : /**
54 : * Open a selector.
55 : * If converterListSize is 0, build for all available converters.
56 : * If excludedCodePoints is NULL, don't exclude any code points.
57 : *
58 : * @param converterList a pointer to encoding names needed to be involved.
59 : * Can be NULL if converterListSize==0.
60 : * The list and the names will be cloned, and the caller
61 : * retains ownership of the original.
62 : * @param converterListSize number of encodings in above list.
63 : * If 0, builds a selector for all available converters.
64 : * @param excludedCodePoints a set of code points to be excluded from consideration.
65 : * That is, excluded code points in a string do not change
66 : * the selection result. (They might be handled by a callback.)
67 : * Use NULL to exclude nothing.
68 : * @param whichSet what converter set to use? Use this to determine whether
69 : * to consider only roundtrip mappings or also fallbacks.
70 : * @param status an in/out ICU UErrorCode
71 : * @return the new selector
72 : *
73 : * @stable ICU 4.2
74 : */
75 : U_STABLE UConverterSelector* U_EXPORT2
76 : ucnvsel_open(const char* const* converterList, int32_t converterListSize,
77 : const USet* excludedCodePoints,
78 : const UConverterUnicodeSet whichSet, UErrorCode* status);
79 :
80 : /**
81 : * Closes a selector.
82 : * If any Enumerations were returned by ucnv_select*, they become invalid.
83 : * They can be closed before or after calling ucnv_closeSelector,
84 : * but should never be used after the selector is closed.
85 : *
86 : * @see ucnv_selectForString
87 : * @see ucnv_selectForUTF8
88 : *
89 : * @param sel selector to close
90 : *
91 : * @stable ICU 4.2
92 : */
93 : U_STABLE void U_EXPORT2
94 : ucnvsel_close(UConverterSelector *sel);
95 :
96 : #if U_SHOW_CPLUSPLUS_API
97 :
98 : U_NAMESPACE_BEGIN
99 :
100 : /**
101 : * \class LocalUConverterSelectorPointer
102 : * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
103 : * For most methods see the LocalPointerBase base class.
104 : *
105 : * @see LocalPointerBase
106 : * @see LocalPointer
107 : * @stable ICU 4.4
108 : */
109 0 : U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
110 :
111 : U_NAMESPACE_END
112 :
113 : #endif
114 :
115 : /**
116 : * Open a selector from its serialized form.
117 : * The buffer must remain valid and unchanged for the lifetime of the selector.
118 : * This is much faster than creating a selector from scratch.
119 : * Using a serialized form from a different machine (endianness/charset) is supported.
120 : *
121 : * @param buffer pointer to the serialized form of a converter selector;
122 : * must be 32-bit-aligned
123 : * @param length the capacity of this buffer (can be equal to or larger than
124 : * the actual data length)
125 : * @param status an in/out ICU UErrorCode
126 : * @return the new selector
127 : *
128 : * @stable ICU 4.2
129 : */
130 : U_STABLE UConverterSelector* U_EXPORT2
131 : ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
132 :
133 : /**
134 : * Serialize a selector into a linear buffer.
135 : * The serialized form is portable to different machines.
136 : *
137 : * @param sel selector to consider
138 : * @param buffer pointer to 32-bit-aligned memory to be filled with the
139 : * serialized form of this converter selector
140 : * @param bufferCapacity the capacity of this buffer
141 : * @param status an in/out ICU UErrorCode
142 : * @return the required buffer capacity to hold serialize data (even if the call fails
143 : * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
144 : *
145 : * @stable ICU 4.2
146 : */
147 : U_STABLE int32_t U_EXPORT2
148 : ucnvsel_serialize(const UConverterSelector* sel,
149 : void* buffer, int32_t bufferCapacity, UErrorCode* status);
150 :
151 : /**
152 : * Select converters that can map all characters in a UTF-16 string,
153 : * ignoring the excluded code points.
154 : *
155 : * @param sel a selector
156 : * @param s UTF-16 string
157 : * @param length length of the string, or -1 if NUL-terminated
158 : * @param status an in/out ICU UErrorCode
159 : * @return an enumeration containing encoding names.
160 : * The returned encoding names and their order will be the same as
161 : * supplied when building the selector.
162 : *
163 : * @stable ICU 4.2
164 : */
165 : U_STABLE UEnumeration * U_EXPORT2
166 : ucnvsel_selectForString(const UConverterSelector* sel,
167 : const UChar *s, int32_t length, UErrorCode *status);
168 :
169 : /**
170 : * Select converters that can map all characters in a UTF-8 string,
171 : * ignoring the excluded code points.
172 : *
173 : * @param sel a selector
174 : * @param s UTF-8 string
175 : * @param length length of the string, or -1 if NUL-terminated
176 : * @param status an in/out ICU UErrorCode
177 : * @return an enumeration containing encoding names.
178 : * The returned encoding names and their order will be the same as
179 : * supplied when building the selector.
180 : *
181 : * @stable ICU 4.2
182 : */
183 : U_STABLE UEnumeration * U_EXPORT2
184 : ucnvsel_selectForUTF8(const UConverterSelector* sel,
185 : const char *s, int32_t length, UErrorCode *status);
186 :
187 : #endif /* !UCONFIG_NO_CONVERSION */
188 :
189 : #endif /* __ICU_UCNV_SEL_H__ */
|