Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2010-2012, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * file name: idna.h
9 : * encoding: UTF-8
10 : * tab size: 8 (not used)
11 : * indentation:4
12 : *
13 : * created on: 2010mar05
14 : * created by: Markus W. Scherer
15 : */
16 :
17 : #ifndef __IDNA_H__
18 : #define __IDNA_H__
19 :
20 : /**
21 : * \file
22 : * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
23 : */
24 :
25 : #include "unicode/utypes.h"
26 :
27 : #if !UCONFIG_NO_IDNA
28 :
29 : #include "unicode/bytestream.h"
30 : #include "unicode/stringpiece.h"
31 : #include "unicode/uidna.h"
32 : #include "unicode/unistr.h"
33 :
34 : U_NAMESPACE_BEGIN
35 :
36 : class IDNAInfo;
37 :
38 : /**
39 : * Abstract base class for IDNA processing.
40 : * See http://www.unicode.org/reports/tr46/
41 : * and http://www.ietf.org/rfc/rfc3490.txt
42 : *
43 : * The IDNA class is not intended for public subclassing.
44 : *
45 : * This C++ API currently only implements UTS #46.
46 : * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
47 : * and IDNA2003 (functions that do not use a service object).
48 : * @stable ICU 4.6
49 : */
50 3 : class U_COMMON_API IDNA : public UObject {
51 : public:
52 : /**
53 : * Destructor.
54 : * @stable ICU 4.6
55 : */
56 : ~IDNA();
57 :
58 : /**
59 : * Returns an IDNA instance which implements UTS #46.
60 : * Returns an unmodifiable instance, owned by the caller.
61 : * Cache it for multiple operations, and delete it when done.
62 : * The instance is thread-safe, that is, it can be used concurrently.
63 : *
64 : * UTS #46 defines Unicode IDNA Compatibility Processing,
65 : * updated to the latest version of Unicode and compatible with both
66 : * IDNA2003 and IDNA2008.
67 : *
68 : * The worker functions use transitional processing, including deviation mappings,
69 : * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
70 : * is used in which case the deviation characters are passed through without change.
71 : *
72 : * Disallowed characters are mapped to U+FFFD.
73 : *
74 : * For available options see the uidna.h header.
75 : * Operations with the UTS #46 instance do not support the
76 : * UIDNA_ALLOW_UNASSIGNED option.
77 : *
78 : * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
79 : * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
80 : * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
81 : *
82 : * @param options Bit set to modify the processing and error checking.
83 : * See option bit set values in uidna.h.
84 : * @param errorCode Standard ICU error code. Its input value must
85 : * pass the U_SUCCESS() test, or else the function returns
86 : * immediately. Check for U_FAILURE() on output or use with
87 : * function chaining. (See User Guide for details.)
88 : * @return the UTS #46 IDNA instance, if successful
89 : * @stable ICU 4.6
90 : */
91 : static IDNA *
92 : createUTS46Instance(uint32_t options, UErrorCode &errorCode);
93 :
94 : /**
95 : * Converts a single domain name label into its ASCII form for DNS lookup.
96 : * If any processing step fails, then info.hasErrors() will be TRUE and
97 : * the result might not be an ASCII string.
98 : * The label might be modified according to the types of errors.
99 : * Labels with severe errors will be left in (or turned into) their Unicode form.
100 : *
101 : * The UErrorCode indicates an error only in exceptional cases,
102 : * such as a U_MEMORY_ALLOCATION_ERROR.
103 : *
104 : * @param label Input domain name label
105 : * @param dest Destination string object
106 : * @param info Output container of IDNA processing details.
107 : * @param errorCode Standard ICU error code. Its input value must
108 : * pass the U_SUCCESS() test, or else the function returns
109 : * immediately. Check for U_FAILURE() on output or use with
110 : * function chaining. (See User Guide for details.)
111 : * @return dest
112 : * @stable ICU 4.6
113 : */
114 : virtual UnicodeString &
115 : labelToASCII(const UnicodeString &label, UnicodeString &dest,
116 : IDNAInfo &info, UErrorCode &errorCode) const = 0;
117 :
118 : /**
119 : * Converts a single domain name label into its Unicode form for human-readable display.
120 : * If any processing step fails, then info.hasErrors() will be TRUE.
121 : * The label might be modified according to the types of errors.
122 : *
123 : * The UErrorCode indicates an error only in exceptional cases,
124 : * such as a U_MEMORY_ALLOCATION_ERROR.
125 : *
126 : * @param label Input domain name label
127 : * @param dest Destination string object
128 : * @param info Output container of IDNA processing details.
129 : * @param errorCode Standard ICU error code. Its input value must
130 : * pass the U_SUCCESS() test, or else the function returns
131 : * immediately. Check for U_FAILURE() on output or use with
132 : * function chaining. (See User Guide for details.)
133 : * @return dest
134 : * @stable ICU 4.6
135 : */
136 : virtual UnicodeString &
137 : labelToUnicode(const UnicodeString &label, UnicodeString &dest,
138 : IDNAInfo &info, UErrorCode &errorCode) const = 0;
139 :
140 : /**
141 : * Converts a whole domain name into its ASCII form for DNS lookup.
142 : * If any processing step fails, then info.hasErrors() will be TRUE and
143 : * the result might not be an ASCII string.
144 : * The domain name might be modified according to the types of errors.
145 : * Labels with severe errors will be left in (or turned into) their Unicode form.
146 : *
147 : * The UErrorCode indicates an error only in exceptional cases,
148 : * such as a U_MEMORY_ALLOCATION_ERROR.
149 : *
150 : * @param name Input domain name
151 : * @param dest Destination string object
152 : * @param info Output container of IDNA processing details.
153 : * @param errorCode Standard ICU error code. Its input value must
154 : * pass the U_SUCCESS() test, or else the function returns
155 : * immediately. Check for U_FAILURE() on output or use with
156 : * function chaining. (See User Guide for details.)
157 : * @return dest
158 : * @stable ICU 4.6
159 : */
160 : virtual UnicodeString &
161 : nameToASCII(const UnicodeString &name, UnicodeString &dest,
162 : IDNAInfo &info, UErrorCode &errorCode) const = 0;
163 :
164 : /**
165 : * Converts a whole domain name into its Unicode form for human-readable display.
166 : * If any processing step fails, then info.hasErrors() will be TRUE.
167 : * The domain name might be modified according to the types of errors.
168 : *
169 : * The UErrorCode indicates an error only in exceptional cases,
170 : * such as a U_MEMORY_ALLOCATION_ERROR.
171 : *
172 : * @param name Input domain name
173 : * @param dest Destination string object
174 : * @param info Output container of IDNA processing details.
175 : * @param errorCode Standard ICU error code. Its input value must
176 : * pass the U_SUCCESS() test, or else the function returns
177 : * immediately. Check for U_FAILURE() on output or use with
178 : * function chaining. (See User Guide for details.)
179 : * @return dest
180 : * @stable ICU 4.6
181 : */
182 : virtual UnicodeString &
183 : nameToUnicode(const UnicodeString &name, UnicodeString &dest,
184 : IDNAInfo &info, UErrorCode &errorCode) const = 0;
185 :
186 : // UTF-8 versions of the processing methods ---------------------------- ***
187 :
188 : /**
189 : * Converts a single domain name label into its ASCII form for DNS lookup.
190 : * UTF-8 version of labelToASCII(), same behavior.
191 : *
192 : * @param label Input domain name label
193 : * @param dest Destination byte sink; Flush()ed if successful
194 : * @param info Output container of IDNA processing details.
195 : * @param errorCode Standard ICU error code. Its input value must
196 : * pass the U_SUCCESS() test, or else the function returns
197 : * immediately. Check for U_FAILURE() on output or use with
198 : * function chaining. (See User Guide for details.)
199 : * @return dest
200 : * @stable ICU 4.6
201 : */
202 : virtual void
203 : labelToASCII_UTF8(StringPiece label, ByteSink &dest,
204 : IDNAInfo &info, UErrorCode &errorCode) const;
205 :
206 : /**
207 : * Converts a single domain name label into its Unicode form for human-readable display.
208 : * UTF-8 version of labelToUnicode(), same behavior.
209 : *
210 : * @param label Input domain name label
211 : * @param dest Destination byte sink; Flush()ed if successful
212 : * @param info Output container of IDNA processing details.
213 : * @param errorCode Standard ICU error code. Its input value must
214 : * pass the U_SUCCESS() test, or else the function returns
215 : * immediately. Check for U_FAILURE() on output or use with
216 : * function chaining. (See User Guide for details.)
217 : * @return dest
218 : * @stable ICU 4.6
219 : */
220 : virtual void
221 : labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
222 : IDNAInfo &info, UErrorCode &errorCode) const;
223 :
224 : /**
225 : * Converts a whole domain name into its ASCII form for DNS lookup.
226 : * UTF-8 version of nameToASCII(), same behavior.
227 : *
228 : * @param name Input domain name
229 : * @param dest Destination byte sink; Flush()ed if successful
230 : * @param info Output container of IDNA processing details.
231 : * @param errorCode Standard ICU error code. Its input value must
232 : * pass the U_SUCCESS() test, or else the function returns
233 : * immediately. Check for U_FAILURE() on output or use with
234 : * function chaining. (See User Guide for details.)
235 : * @return dest
236 : * @stable ICU 4.6
237 : */
238 : virtual void
239 : nameToASCII_UTF8(StringPiece name, ByteSink &dest,
240 : IDNAInfo &info, UErrorCode &errorCode) const;
241 :
242 : /**
243 : * Converts a whole domain name into its Unicode form for human-readable display.
244 : * UTF-8 version of nameToUnicode(), same behavior.
245 : *
246 : * @param name Input domain name
247 : * @param dest Destination byte sink; Flush()ed if successful
248 : * @param info Output container of IDNA processing details.
249 : * @param errorCode Standard ICU error code. Its input value must
250 : * pass the U_SUCCESS() test, or else the function returns
251 : * immediately. Check for U_FAILURE() on output or use with
252 : * function chaining. (See User Guide for details.)
253 : * @return dest
254 : * @stable ICU 4.6
255 : */
256 : virtual void
257 : nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
258 : IDNAInfo &info, UErrorCode &errorCode) const;
259 : };
260 :
261 : class UTS46;
262 :
263 : /**
264 : * Output container for IDNA processing errors.
265 : * The IDNAInfo class is not suitable for subclassing.
266 : * @stable ICU 4.6
267 : */
268 : class U_COMMON_API IDNAInfo : public UMemory {
269 : public:
270 : /**
271 : * Constructor for stack allocation.
272 : * @stable ICU 4.6
273 : */
274 0 : IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
275 : /**
276 : * Were there IDNA processing errors?
277 : * @return TRUE if there were processing errors
278 : * @stable ICU 4.6
279 : */
280 : UBool hasErrors() const { return errors!=0; }
281 : /**
282 : * Returns a bit set indicating IDNA processing errors.
283 : * See UIDNA_ERROR_... constants in uidna.h.
284 : * @return bit set of processing errors
285 : * @stable ICU 4.6
286 : */
287 0 : uint32_t getErrors() const { return errors; }
288 : /**
289 : * Returns TRUE if transitional and nontransitional processing produce different results.
290 : * This is the case when the input label or domain name contains
291 : * one or more deviation characters outside a Punycode label (see UTS #46).
292 : * <ul>
293 : * <li>With nontransitional processing, such characters are
294 : * copied to the destination string.
295 : * <li>With transitional processing, such characters are
296 : * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
297 : * </ul>
298 : * @return TRUE if transitional and nontransitional processing produce different results
299 : * @stable ICU 4.6
300 : */
301 0 : UBool isTransitionalDifferent() const { return isTransDiff; }
302 :
303 : private:
304 : friend class UTS46;
305 :
306 : IDNAInfo(const IDNAInfo &other); // no copying
307 : IDNAInfo &operator=(const IDNAInfo &other); // no copying
308 :
309 0 : void reset() {
310 0 : errors=labelErrors=0;
311 0 : isTransDiff=FALSE;
312 0 : isBiDi=FALSE;
313 0 : isOkBiDi=TRUE;
314 0 : }
315 :
316 : uint32_t errors, labelErrors;
317 : UBool isTransDiff;
318 : UBool isBiDi;
319 : UBool isOkBiDi;
320 : };
321 :
322 : U_NAMESPACE_END
323 :
324 : #endif // UCONFIG_NO_IDNA
325 : #endif // __IDNA_H__
|