Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : **********************************************************************
5 : * Copyright (C) 2000-2006, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : **********************************************************************
8 : * ucnv_cb.c:
9 : * External APIs for the ICU's codeset conversion library
10 : * Helena Shih
11 : *
12 : * Modification History:
13 : *
14 : * Date Name Description
15 : * 7/28/2000 srl Implementation
16 : */
17 :
18 : /**
19 : * @name Character Conversion C API
20 : *
21 : */
22 :
23 : #include "unicode/utypes.h"
24 :
25 : #if !UCONFIG_NO_CONVERSION
26 :
27 : #include "unicode/ucnv_cb.h"
28 : #include "ucnv_bld.h"
29 : #include "ucnv_cnv.h"
30 : #include "cmemory.h"
31 :
32 : /* need to update the offsets when the target moves. */
33 : /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
34 : if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within
35 : the same call stack if the complexity arises. */
36 : U_CAPI void U_EXPORT2
37 0 : ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
38 : const char* source,
39 : int32_t length,
40 : int32_t offsetIndex,
41 : UErrorCode * err)
42 : {
43 0 : if(U_FAILURE(*err)) {
44 0 : return;
45 : }
46 :
47 0 : ucnv_fromUWriteBytes(
48 : args->converter,
49 : source, length,
50 : &args->target, args->targetLimit,
51 : &args->offsets, offsetIndex,
52 0 : err);
53 : }
54 :
55 : U_CAPI void U_EXPORT2
56 0 : ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
57 : const UChar** source,
58 : const UChar* sourceLimit,
59 : int32_t offsetIndex,
60 : UErrorCode * err)
61 : {
62 : /*
63 : This is a fun one. Recursion can occur - we're basically going to
64 : just retry shoving data through the same converter. Note, if you got
65 : here through some kind of invalid sequence, you maybe should emit a
66 : reset sequence of some kind and/or call ucnv_reset(). Since this
67 : IS an actual conversion, take care that you've changed the callback
68 : or the data, or you'll get an infinite loop.
69 :
70 : Please set the err value to something reasonable before calling
71 : into this.
72 : */
73 :
74 : char *oldTarget;
75 :
76 0 : if(U_FAILURE(*err))
77 : {
78 0 : return;
79 : }
80 :
81 0 : oldTarget = args->target;
82 :
83 0 : ucnv_fromUnicode(args->converter,
84 : &args->target,
85 : args->targetLimit,
86 : source,
87 : sourceLimit,
88 : NULL, /* no offsets */
89 : FALSE, /* no flush */
90 0 : err);
91 :
92 0 : if(args->offsets)
93 : {
94 0 : while (args->target != oldTarget) /* if it moved at all.. */
95 : {
96 0 : *(args->offsets)++ = offsetIndex;
97 0 : oldTarget++;
98 : }
99 : }
100 :
101 : /*
102 : Note, if you did something like used a Stop subcallback, things would get interesting.
103 : In fact, here's where we want to return the partially consumed in-source!
104 : */
105 0 : if(*err == U_BUFFER_OVERFLOW_ERROR)
106 : /* && (*source < sourceLimit && args->target >= args->targetLimit)
107 : -- S. Hrcek */
108 : {
109 : /* Overflowed the target. Now, we'll write into the charErrorBuffer.
110 : It's a fixed size. If we overflow it... Hmm */
111 : char *newTarget;
112 : const char *newTargetLimit;
113 0 : UErrorCode err2 = U_ZERO_ERROR;
114 :
115 : int8_t errBuffLen;
116 :
117 0 : errBuffLen = args->converter->charErrorBufferLength;
118 :
119 : /* start the new target at the first free slot in the errbuff.. */
120 0 : newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
121 :
122 0 : newTargetLimit = (char *)(args->converter->charErrorBuffer +
123 : sizeof(args->converter->charErrorBuffer));
124 :
125 0 : if(newTarget >= newTargetLimit)
126 : {
127 0 : *err = U_INTERNAL_PROGRAM_ERROR;
128 0 : return;
129 : }
130 :
131 : /* We're going to tell the converter that the errbuff len is empty.
132 : This prevents the existing errbuff from being 'flushed' out onto
133 : itself. If the errbuff is needed by the converter this time,
134 : we're hosed - we're out of space! */
135 :
136 0 : args->converter->charErrorBufferLength = 0;
137 :
138 0 : ucnv_fromUnicode(args->converter,
139 : &newTarget,
140 : newTargetLimit,
141 : source,
142 : sourceLimit,
143 : NULL,
144 : FALSE,
145 0 : &err2);
146 :
147 : /* We can go ahead and overwrite the length here. We know just how
148 : to recalculate it. */
149 :
150 0 : args->converter->charErrorBufferLength = (int8_t)(
151 0 : newTarget - (char*)args->converter->charErrorBuffer);
152 :
153 0 : if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
154 : {
155 : /* now we're REALLY in trouble.
156 : Internal program error - callback shouldn't have written this much
157 : data!
158 : */
159 0 : *err = U_INTERNAL_PROGRAM_ERROR;
160 0 : return;
161 : }
162 : /*else {*/
163 : /* sub errs could be invalid/truncated/illegal chars or w/e.
164 : These might want to be passed on up.. But the problem is, we already
165 : need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
166 : other errs.. */
167 :
168 : /*
169 : if(U_FAILURE(err2))
170 : ??
171 : */
172 : /*}*/
173 : }
174 : }
175 :
176 : U_CAPI void U_EXPORT2
177 0 : ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
178 : int32_t offsetIndex,
179 : UErrorCode * err)
180 : {
181 : UConverter *converter;
182 : int32_t length;
183 :
184 0 : if(U_FAILURE(*err)) {
185 0 : return;
186 : }
187 0 : converter = args->converter;
188 0 : length = converter->subCharLen;
189 :
190 0 : if(length == 0) {
191 0 : return;
192 : }
193 :
194 0 : if(length < 0) {
195 : /*
196 : * Write/convert the substitution string. Its real length is -length.
197 : * Unlike the escape callback, we need not change the converter's
198 : * callback function because ucnv_setSubstString() verified that
199 : * the string can be converted, so we will not get a conversion error
200 : * and will not recurse.
201 : * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
202 : */
203 0 : const UChar *source = (const UChar *)converter->subChars;
204 0 : ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
205 0 : return;
206 : }
207 :
208 0 : if(converter->sharedData->impl->writeSub!=NULL) {
209 0 : converter->sharedData->impl->writeSub(args, offsetIndex, err);
210 : }
211 0 : else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
212 : /*
213 : TODO: Is this untestable because the MBCS converter has a writeSub function to call
214 : and the other converters don't use subChar1?
215 : */
216 : ucnv_cbFromUWriteBytes(args,
217 0 : (const char *)&converter->subChar1, 1,
218 0 : offsetIndex, err);
219 : }
220 : else {
221 : ucnv_cbFromUWriteBytes(args,
222 0 : (const char *)converter->subChars, length,
223 0 : offsetIndex, err);
224 : }
225 : }
226 :
227 : U_CAPI void U_EXPORT2
228 0 : ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
229 : const UChar* source,
230 : int32_t length,
231 : int32_t offsetIndex,
232 : UErrorCode * err)
233 : {
234 0 : if(U_FAILURE(*err)) {
235 0 : return;
236 : }
237 :
238 0 : ucnv_toUWriteUChars(
239 : args->converter,
240 : source, length,
241 : &args->target, args->targetLimit,
242 : &args->offsets, offsetIndex,
243 0 : err);
244 : }
245 :
246 : U_CAPI void U_EXPORT2
247 0 : ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
248 : int32_t offsetIndex,
249 : UErrorCode * err)
250 : {
251 : static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
252 :
253 : /* could optimize this case, just one uchar */
254 0 : if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
255 0 : ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
256 : } else {
257 0 : ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
258 : }
259 0 : }
260 :
261 : #endif
|