Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : *
6 : * Copyright (C) 2003-2015, International Business Machines
7 : * Corporation and others. All Rights Reserved.
8 : *
9 : *******************************************************************************
10 : * file name: ucol_swp.cpp
11 : * encoding: UTF-8
12 : * tab size: 8 (not used)
13 : * indentation:4
14 : *
15 : * created on: 2003sep10
16 : * created by: Markus W. Scherer
17 : *
18 : * Swap collation binaries.
19 : */
20 :
21 : #include "unicode/udata.h" /* UDataInfo */
22 : #include "utrie.h"
23 : #include "utrie2.h"
24 : #include "udataswp.h"
25 : #include "cmemory.h"
26 : #include "ucol_data.h"
27 : #include "ucol_swp.h"
28 :
29 : /* swapping ----------------------------------------------------------------- */
30 :
31 : /*
32 : * This performs data swapping for a folded trie (see utrie.c for details).
33 : */
34 :
35 : U_CAPI int32_t U_EXPORT2
36 0 : utrie_swap(const UDataSwapper *ds,
37 : const void *inData, int32_t length, void *outData,
38 : UErrorCode *pErrorCode) {
39 : const UTrieHeader *inTrie;
40 : UTrieHeader trie;
41 : int32_t size;
42 : UBool dataIs32;
43 :
44 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
45 0 : return 0;
46 : }
47 0 : if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
48 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
49 0 : return 0;
50 : }
51 :
52 : /* setup and swapping */
53 0 : if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
54 0 : *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
55 0 : return 0;
56 : }
57 :
58 0 : inTrie=(const UTrieHeader *)inData;
59 0 : trie.signature=ds->readUInt32(inTrie->signature);
60 0 : trie.options=ds->readUInt32(inTrie->options);
61 0 : trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
62 0 : trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
63 :
64 0 : if( trie.signature!=0x54726965 ||
65 0 : (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
66 0 : ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
67 0 : trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
68 0 : (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
69 0 : trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
70 0 : (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
71 0 : ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
72 : ) {
73 0 : *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
74 0 : return 0;
75 : }
76 :
77 0 : dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
78 0 : size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
79 :
80 0 : if(length>=0) {
81 : UTrieHeader *outTrie;
82 :
83 0 : if(length<size) {
84 0 : *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
85 0 : return 0;
86 : }
87 :
88 0 : outTrie=(UTrieHeader *)outData;
89 :
90 : /* swap the header */
91 0 : ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
92 :
93 : /* swap the index and the data */
94 0 : if(dataIs32) {
95 0 : ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
96 0 : ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
97 0 : (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
98 : } else {
99 0 : ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
100 : }
101 : }
102 :
103 0 : return size;
104 : }
105 :
106 : #if !UCONFIG_NO_COLLATION
107 :
108 : U_CAPI UBool U_EXPORT2
109 0 : ucol_looksLikeCollationBinary(const UDataSwapper *ds,
110 : const void *inData, int32_t length) {
111 0 : if(ds==NULL || inData==NULL || length<-1) {
112 0 : return FALSE;
113 : }
114 :
115 : // First check for format version 4+ which has a standard data header.
116 0 : UErrorCode errorCode=U_ZERO_ERROR;
117 0 : (void)udata_swapDataHeader(ds, inData, -1, NULL, &errorCode);
118 0 : if(U_SUCCESS(errorCode)) {
119 0 : const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
120 0 : if(info.dataFormat[0]==0x55 && // dataFormat="UCol"
121 0 : info.dataFormat[1]==0x43 &&
122 0 : info.dataFormat[2]==0x6f &&
123 0 : info.dataFormat[3]==0x6c) {
124 0 : return TRUE;
125 : }
126 : }
127 :
128 : // Else check for format version 3.
129 0 : const UCATableHeader *inHeader=(const UCATableHeader *)inData;
130 :
131 : /*
132 : * The collation binary must contain at least the UCATableHeader,
133 : * starting with its size field.
134 : * sizeof(UCATableHeader)==42*4 in ICU 2.8
135 : * check the length against the header size before reading the size field
136 : */
137 : UCATableHeader header;
138 0 : uprv_memset(&header, 0, sizeof(header));
139 0 : if(length<0) {
140 0 : header.size=udata_readInt32(ds, inHeader->size);
141 0 : } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
142 0 : return FALSE;
143 : }
144 :
145 0 : header.magic=ds->readUInt32(inHeader->magic);
146 0 : if(!(
147 0 : header.magic==UCOL_HEADER_MAGIC &&
148 0 : inHeader->formatVersion[0]==3 /*&&
149 : inHeader->formatVersion[1]>=0*/
150 : )) {
151 0 : return FALSE;
152 : }
153 :
154 0 : if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
155 0 : return FALSE;
156 : }
157 :
158 0 : return TRUE;
159 : }
160 :
161 : namespace {
162 :
163 : /* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */
164 : int32_t
165 0 : swapFormatVersion3(const UDataSwapper *ds,
166 : const void *inData, int32_t length, void *outData,
167 : UErrorCode *pErrorCode) {
168 : const uint8_t *inBytes;
169 : uint8_t *outBytes;
170 :
171 : const UCATableHeader *inHeader;
172 : UCATableHeader *outHeader;
173 : UCATableHeader header;
174 :
175 : uint32_t count;
176 :
177 : /* argument checking in case we were not called from ucol_swap() */
178 0 : if(U_FAILURE(*pErrorCode)) {
179 0 : return 0;
180 : }
181 0 : if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
182 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
183 0 : return 0;
184 : }
185 :
186 0 : inBytes=(const uint8_t *)inData;
187 0 : outBytes=(uint8_t *)outData;
188 :
189 0 : inHeader=(const UCATableHeader *)inData;
190 0 : outHeader=(UCATableHeader *)outData;
191 :
192 : /*
193 : * The collation binary must contain at least the UCATableHeader,
194 : * starting with its size field.
195 : * sizeof(UCATableHeader)==42*4 in ICU 2.8
196 : * check the length against the header size before reading the size field
197 : */
198 0 : uprv_memset(&header, 0, sizeof(header));
199 0 : if(length<0) {
200 0 : header.size=udata_readInt32(ds, inHeader->size);
201 0 : } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
202 : udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n",
203 0 : length);
204 0 : *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
205 0 : return 0;
206 : }
207 :
208 0 : header.magic=ds->readUInt32(inHeader->magic);
209 0 : if(!(
210 0 : header.magic==UCOL_HEADER_MAGIC &&
211 0 : inHeader->formatVersion[0]==3 /*&&
212 : inHeader->formatVersion[1]>=0*/
213 : )) {
214 0 : udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
215 : header.magic,
216 0 : inHeader->formatVersion[0], inHeader->formatVersion[1]);
217 0 : *pErrorCode=U_UNSUPPORTED_ERROR;
218 0 : return 0;
219 : }
220 :
221 0 : if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
222 0 : udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n",
223 0 : inHeader->isBigEndian, inHeader->charSetFamily);
224 0 : *pErrorCode=U_INVALID_FORMAT_ERROR;
225 0 : return 0;
226 : }
227 :
228 0 : if(length>=0) {
229 : /* copy everything, takes care of data that needs no swapping */
230 0 : if(inBytes!=outBytes) {
231 0 : uprv_memcpy(outBytes, inBytes, header.size);
232 : }
233 :
234 : /* swap the necessary pieces in the order of their occurrence in the data */
235 :
236 : /* read more of the UCATableHeader (the size field was read above) */
237 0 : header.options= ds->readUInt32(inHeader->options);
238 0 : header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);
239 0 : header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos);
240 0 : header.mappingPosition= ds->readUInt32(inHeader->mappingPosition);
241 0 : header.expansion= ds->readUInt32(inHeader->expansion);
242 0 : header.contractionIndex= ds->readUInt32(inHeader->contractionIndex);
243 0 : header.contractionCEs= ds->readUInt32(inHeader->contractionCEs);
244 0 : header.contractionSize= ds->readUInt32(inHeader->contractionSize);
245 0 : header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE);
246 0 : header.expansionCESize= ds->readUInt32(inHeader->expansionCESize);
247 0 : header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount);
248 0 : header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
249 0 : header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte);
250 0 : header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript);
251 :
252 : /* swap the 32-bit integers in the header */
253 0 : ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),
254 0 : outHeader, pErrorCode);
255 0 : ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),
256 0 : &(outHeader->scriptToLeadByte), pErrorCode);
257 : /* set the output platform properties */
258 0 : outHeader->isBigEndian=ds->outIsBigEndian;
259 0 : outHeader->charSetFamily=ds->outCharset;
260 :
261 : /* swap the options */
262 0 : if(header.options!=0) {
263 0 : ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,
264 0 : outBytes+header.options, pErrorCode);
265 : }
266 :
267 : /* swap the expansions */
268 0 : if(header.mappingPosition!=0 && header.expansion!=0) {
269 0 : if(header.contractionIndex!=0) {
270 : /* expansions bounded by contractions */
271 0 : count=header.contractionIndex-header.expansion;
272 : } else {
273 : /* no contractions: expansions bounded by the main trie */
274 0 : count=header.mappingPosition-header.expansion;
275 : }
276 0 : ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,
277 0 : outBytes+header.expansion, pErrorCode);
278 : }
279 :
280 : /* swap the contractions */
281 0 : if(header.contractionSize!=0) {
282 : /* contractionIndex: UChar[] */
283 0 : ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2,
284 0 : outBytes+header.contractionIndex, pErrorCode);
285 :
286 : /* contractionCEs: CEs[] */
287 0 : ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4,
288 0 : outBytes+header.contractionCEs, pErrorCode);
289 : }
290 :
291 : /* swap the main trie */
292 0 : if(header.mappingPosition!=0) {
293 0 : count=header.endExpansionCE-header.mappingPosition;
294 0 : utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,
295 0 : outBytes+header.mappingPosition, pErrorCode);
296 : }
297 :
298 : /* swap the max expansion table */
299 0 : if(header.endExpansionCECount!=0) {
300 0 : ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,
301 0 : outBytes+header.endExpansionCE, pErrorCode);
302 : }
303 :
304 : /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
305 :
306 : /* swap UCA constants */
307 0 : if(header.UCAConsts!=0) {
308 : /*
309 : * if UCAConsts!=0 then contractionUCACombos because we are swapping
310 : * the UCA data file, and we know that the UCA contains contractions
311 : */
312 0 : ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
313 0 : outBytes+header.UCAConsts, pErrorCode);
314 : }
315 :
316 : /* swap UCA contractions */
317 0 : if(header.contractionUCACombosSize!=0) {
318 0 : count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR;
319 0 : ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
320 0 : outBytes+header.contractionUCACombos, pErrorCode);
321 : }
322 :
323 : /* swap the script to lead bytes */
324 0 : if(header.scriptToLeadByte!=0) {
325 0 : int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16
326 0 : int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16
327 0 : ds->swapArray16(ds, inBytes+header.scriptToLeadByte,
328 0 : 4 + (4 * indexCount) + (2 * dataCount),
329 0 : outBytes+header.scriptToLeadByte, pErrorCode);
330 : }
331 :
332 : /* swap the lead byte to scripts */
333 0 : if(header.leadByteToScript!=0) {
334 0 : int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16
335 0 : int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16
336 0 : ds->swapArray16(ds, inBytes+header.leadByteToScript,
337 0 : 4 + (2 * indexCount) + (2 * dataCount),
338 0 : outBytes+header.leadByteToScript, pErrorCode);
339 : }
340 : }
341 :
342 0 : return header.size;
343 : }
344 :
345 : // swap formatVersion 4 or 5 ----------------------------------------------- ***
346 :
347 : // The following are copied from CollationDataReader, trading an awkward copy of constants
348 : // for an awkward relocation of the i18n collationdatareader.h file into the common library.
349 : // Keep them in sync!
350 :
351 : enum {
352 : IX_INDEXES_LENGTH, // 0
353 : IX_OPTIONS,
354 : IX_RESERVED2,
355 : IX_RESERVED3,
356 :
357 : IX_JAMO_CE32S_START, // 4
358 : IX_REORDER_CODES_OFFSET,
359 : IX_REORDER_TABLE_OFFSET,
360 : IX_TRIE_OFFSET,
361 :
362 : IX_RESERVED8_OFFSET, // 8
363 : IX_CES_OFFSET,
364 : IX_RESERVED10_OFFSET,
365 : IX_CE32S_OFFSET,
366 :
367 : IX_ROOT_ELEMENTS_OFFSET, // 12
368 : IX_CONTEXTS_OFFSET,
369 : IX_UNSAFE_BWD_OFFSET,
370 : IX_FAST_LATIN_TABLE_OFFSET,
371 :
372 : IX_SCRIPTS_OFFSET, // 16
373 : IX_COMPRESSIBLE_BYTES_OFFSET,
374 : IX_RESERVED18_OFFSET,
375 : IX_TOTAL_SIZE
376 : };
377 :
378 : int32_t
379 0 : swapFormatVersion4(const UDataSwapper *ds,
380 : const void *inData, int32_t length, void *outData,
381 : UErrorCode &errorCode) {
382 0 : if(U_FAILURE(errorCode)) { return 0; }
383 :
384 0 : const uint8_t *inBytes=(const uint8_t *)inData;
385 0 : uint8_t *outBytes=(uint8_t *)outData;
386 :
387 0 : const int32_t *inIndexes=(const int32_t *)inBytes;
388 : int32_t indexes[IX_TOTAL_SIZE+1];
389 :
390 : // Need at least IX_INDEXES_LENGTH and IX_OPTIONS.
391 0 : if(0<=length && length<8) {
392 : udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
393 : "(%d after header) for collation data\n",
394 0 : length);
395 0 : errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
396 0 : return 0;
397 : }
398 :
399 0 : int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]);
400 0 : if(0<=length && length<(indexesLength*4)) {
401 : udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
402 : "(%d after header) for collation data\n",
403 0 : length);
404 0 : errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
405 0 : return 0;
406 : }
407 :
408 0 : for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) {
409 0 : indexes[i]=udata_readInt32(ds, inIndexes[i]);
410 : }
411 0 : for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) {
412 0 : indexes[i]=-1;
413 : }
414 0 : inIndexes=NULL; // Make sure we do not accidentally use these instead of indexes[].
415 :
416 : // Get the total length of the data.
417 : int32_t size;
418 0 : if(indexesLength>IX_TOTAL_SIZE) {
419 0 : size=indexes[IX_TOTAL_SIZE];
420 0 : } else if(indexesLength>IX_REORDER_CODES_OFFSET) {
421 0 : size=indexes[indexesLength-1];
422 : } else {
423 0 : size=indexesLength*4;
424 : }
425 0 : if(length<0) { return size; }
426 :
427 0 : if(length<size) {
428 : udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
429 : "(%d after header) for collation data\n",
430 0 : length);
431 0 : errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
432 0 : return 0;
433 : }
434 :
435 : // Copy the data for inaccessible bytes and arrays of bytes.
436 0 : if(inBytes!=outBytes) {
437 0 : uprv_memcpy(outBytes, inBytes, size);
438 : }
439 :
440 : // Swap the int32_t indexes[].
441 0 : ds->swapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode);
442 :
443 : // The following is a modified version of CollationDataReader::read().
444 : // Here we use indexes[] not inIndexes[] because
445 : // the inIndexes[] may not be in this machine's endianness.
446 : int32_t index; // one of the indexes[] slots
447 : int32_t offset; // byte offset for the index part
448 : // int32_t length; // number of bytes in the index part
449 :
450 0 : index = IX_REORDER_CODES_OFFSET;
451 0 : offset = indexes[index];
452 0 : length = indexes[index + 1] - offset;
453 0 : if(length > 0) {
454 0 : ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
455 : }
456 :
457 : // Skip the IX_REORDER_TABLE_OFFSET byte array.
458 :
459 0 : index = IX_TRIE_OFFSET;
460 0 : offset = indexes[index];
461 0 : length = indexes[index + 1] - offset;
462 0 : if(length > 0) {
463 0 : utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode);
464 : }
465 :
466 0 : index = IX_RESERVED8_OFFSET;
467 0 : offset = indexes[index];
468 0 : length = indexes[index + 1] - offset;
469 0 : if(length > 0) {
470 0 : udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length);
471 0 : errorCode = U_UNSUPPORTED_ERROR;
472 0 : return 0;
473 : }
474 :
475 0 : index = IX_CES_OFFSET;
476 0 : offset = indexes[index];
477 0 : length = indexes[index + 1] - offset;
478 0 : if(length > 0) {
479 0 : ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode);
480 : }
481 :
482 0 : index = IX_RESERVED10_OFFSET;
483 0 : offset = indexes[index];
484 0 : length = indexes[index + 1] - offset;
485 0 : if(length > 0) {
486 0 : udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length);
487 0 : errorCode = U_UNSUPPORTED_ERROR;
488 0 : return 0;
489 : }
490 :
491 0 : index = IX_CE32S_OFFSET;
492 0 : offset = indexes[index];
493 0 : length = indexes[index + 1] - offset;
494 0 : if(length > 0) {
495 0 : ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
496 : }
497 :
498 0 : index = IX_ROOT_ELEMENTS_OFFSET;
499 0 : offset = indexes[index];
500 0 : length = indexes[index + 1] - offset;
501 0 : if(length > 0) {
502 0 : ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
503 : }
504 :
505 0 : index = IX_CONTEXTS_OFFSET;
506 0 : offset = indexes[index];
507 0 : length = indexes[index + 1] - offset;
508 0 : if(length > 0) {
509 0 : ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
510 : }
511 :
512 0 : index = IX_UNSAFE_BWD_OFFSET;
513 0 : offset = indexes[index];
514 0 : length = indexes[index + 1] - offset;
515 0 : if(length > 0) {
516 0 : ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
517 : }
518 :
519 0 : index = IX_FAST_LATIN_TABLE_OFFSET;
520 0 : offset = indexes[index];
521 0 : length = indexes[index + 1] - offset;
522 0 : if(length > 0) {
523 0 : ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
524 : }
525 :
526 0 : index = IX_SCRIPTS_OFFSET;
527 0 : offset = indexes[index];
528 0 : length = indexes[index + 1] - offset;
529 0 : if(length > 0) {
530 0 : ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
531 : }
532 :
533 : // Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array.
534 :
535 0 : index = IX_RESERVED18_OFFSET;
536 0 : offset = indexes[index];
537 0 : length = indexes[index + 1] - offset;
538 0 : if(length > 0) {
539 0 : udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length);
540 0 : errorCode = U_UNSUPPORTED_ERROR;
541 0 : return 0;
542 : }
543 :
544 0 : return size;
545 : }
546 :
547 : } // namespace
548 :
549 : /* swap ICU collation data like ucadata.icu */
550 : U_CAPI int32_t U_EXPORT2
551 0 : ucol_swap(const UDataSwapper *ds,
552 : const void *inData, int32_t length, void *outData,
553 : UErrorCode *pErrorCode) {
554 0 : if(U_FAILURE(*pErrorCode)) { return 0; }
555 :
556 : /* udata_swapDataHeader checks the arguments */
557 0 : int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
558 0 : if(U_FAILURE(*pErrorCode)) {
559 : // Try to swap the old format version which did not have a standard data header.
560 0 : *pErrorCode=U_ZERO_ERROR;
561 0 : return swapFormatVersion3(ds, inData, length, outData, pErrorCode);
562 : }
563 :
564 : /* check data format and format version */
565 0 : const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
566 0 : if(!(
567 0 : info.dataFormat[0]==0x55 && // dataFormat="UCol"
568 0 : info.dataFormat[1]==0x43 &&
569 0 : info.dataFormat[2]==0x6f &&
570 0 : info.dataFormat[3]==0x6c &&
571 0 : (3<=info.formatVersion[0] && info.formatVersion[0]<=5)
572 : )) {
573 0 : udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x "
574 : "(format version %02x.%02x) is not recognized as collation data\n",
575 0 : info.dataFormat[0], info.dataFormat[1],
576 0 : info.dataFormat[2], info.dataFormat[3],
577 0 : info.formatVersion[0], info.formatVersion[1]);
578 0 : *pErrorCode=U_UNSUPPORTED_ERROR;
579 0 : return 0;
580 : }
581 :
582 0 : inData=(const char *)inData+headerSize;
583 0 : if(length>=0) { length-=headerSize; }
584 0 : outData=(char *)outData+headerSize;
585 : int32_t collationSize;
586 0 : if(info.formatVersion[0]>=4) {
587 0 : collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode);
588 : } else {
589 0 : collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode);
590 : }
591 0 : if(U_SUCCESS(*pErrorCode)) {
592 0 : return headerSize+collationSize;
593 : } else {
594 0 : return 0;
595 : }
596 : }
597 :
598 : /* swap inverse UCA collation data (invuca.icu) */
599 : U_CAPI int32_t U_EXPORT2
600 0 : ucol_swapInverseUCA(const UDataSwapper *ds,
601 : const void *inData, int32_t length, void *outData,
602 : UErrorCode *pErrorCode) {
603 : const UDataInfo *pInfo;
604 : int32_t headerSize;
605 :
606 : const uint8_t *inBytes;
607 : uint8_t *outBytes;
608 :
609 : const InverseUCATableHeader *inHeader;
610 : InverseUCATableHeader *outHeader;
611 0 : InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };
612 :
613 : /* udata_swapDataHeader checks the arguments */
614 0 : headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
615 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
616 0 : return 0;
617 : }
618 :
619 : /* check data format and format version */
620 0 : pInfo=(const UDataInfo *)((const char *)inData+4);
621 0 : if(!(
622 0 : pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */
623 0 : pInfo->dataFormat[1]==0x6e &&
624 0 : pInfo->dataFormat[2]==0x76 &&
625 0 : pInfo->dataFormat[3]==0x43 &&
626 0 : pInfo->formatVersion[0]==2 &&
627 0 : pInfo->formatVersion[1]>=1
628 : )) {
629 0 : udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
630 0 : pInfo->dataFormat[0], pInfo->dataFormat[1],
631 0 : pInfo->dataFormat[2], pInfo->dataFormat[3],
632 0 : pInfo->formatVersion[0], pInfo->formatVersion[1]);
633 0 : *pErrorCode=U_UNSUPPORTED_ERROR;
634 0 : return 0;
635 : }
636 :
637 0 : inBytes=(const uint8_t *)inData+headerSize;
638 0 : outBytes=(uint8_t *)outData+headerSize;
639 :
640 0 : inHeader=(const InverseUCATableHeader *)inBytes;
641 0 : outHeader=(InverseUCATableHeader *)outBytes;
642 :
643 : /*
644 : * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
645 : * starting with its size field.
646 : * sizeof(UCATableHeader)==8*4 in ICU 2.8
647 : * check the length against the header size before reading the size field
648 : */
649 0 : if(length<0) {
650 0 : header.byteSize=udata_readInt32(ds, inHeader->byteSize);
651 0 : } else if(
652 0 : ((length-headerSize)<(8*4) ||
653 0 : (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))
654 : ) {
655 : udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
656 0 : length);
657 0 : *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
658 0 : return 0;
659 : }
660 :
661 0 : if(length>=0) {
662 : /* copy everything, takes care of data that needs no swapping */
663 0 : if(inBytes!=outBytes) {
664 0 : uprv_memcpy(outBytes, inBytes, header.byteSize);
665 : }
666 :
667 : /* swap the necessary pieces in the order of their occurrence in the data */
668 :
669 : /* read more of the InverseUCATableHeader (the byteSize field was read above) */
670 0 : header.tableSize= ds->readUInt32(inHeader->tableSize);
671 0 : header.contsSize= ds->readUInt32(inHeader->contsSize);
672 0 : header.table= ds->readUInt32(inHeader->table);
673 0 : header.conts= ds->readUInt32(inHeader->conts);
674 :
675 : /* swap the 32-bit integers in the header */
676 0 : ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);
677 :
678 : /* swap the inverse table; tableSize counts uint32_t[3] rows */
679 0 : ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4,
680 0 : outBytes+header.table, pErrorCode);
681 :
682 : /* swap the continuation table; contsSize counts UChars */
683 0 : ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR,
684 0 : outBytes+header.conts, pErrorCode);
685 : }
686 :
687 0 : return headerSize+header.byteSize;
688 : }
689 :
690 : #endif /* #if !UCONFIG_NO_COLLATION */
|