LCOV - code coverage report
Current view: top level - intl/icu/source/common - ucmndata.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 85 137 62.0 %
Date: 2017-07-14 16:53:18 Functions: 7 10 70.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *
       6             : *   Copyright (C) 1999-2011, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : ******************************************************************************/
      10             : 
      11             : 
      12             : /*------------------------------------------------------------------------------
      13             :  *
      14             :  *   UCommonData   An abstract interface for dealing with ICU Common Data Files.
      15             :  *                 ICU Common Data Files are a grouping of a number of individual
      16             :  *                 data items (resources, converters, tables, anything) into a
      17             :  *                 single file or dll.  The combined format includes a table of
      18             :  *                 contents for locating the individual items by name.
      19             :  *
      20             :  *                 Two formats for the table of contents are supported, which is
      21             :  *                 why there is an abstract inteface involved.
      22             :  *
      23             :  */
      24             : 
      25             : #include "unicode/utypes.h"
      26             : #include "unicode/udata.h"
      27             : #include "cstring.h"
      28             : #include "ucmndata.h"
      29             : #include "udatamem.h"
      30             : 
      31             : #if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP)
      32             : #   include <stdio.h>
      33             : #endif
      34             : 
      35             : U_CFUNC uint16_t
      36          13 : udata_getHeaderSize(const DataHeader *udh) {
      37          13 :     if(udh==NULL) {
      38           0 :         return 0;
      39          13 :     } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) {
      40             :         /* same endianness */
      41          13 :         return udh->dataHeader.headerSize;
      42             :     } else {
      43             :         /* opposite endianness */
      44           0 :         uint16_t x=udh->dataHeader.headerSize;
      45           0 :         return (uint16_t)((x<<8)|(x>>8));
      46             :     }
      47             : }
      48             : 
      49             : U_CFUNC uint16_t
      50           0 : udata_getInfoSize(const UDataInfo *info) {
      51           0 :     if(info==NULL) {
      52           0 :         return 0;
      53           0 :     } else if(info->isBigEndian==U_IS_BIG_ENDIAN) {
      54             :         /* same endianness */
      55           0 :         return info->size;
      56             :     } else {
      57             :         /* opposite endianness */
      58           0 :         uint16_t x=info->size;
      59           0 :         return (uint16_t)((x<<8)|(x>>8));
      60             :     }
      61             : }
      62             : 
      63             : /*-----------------------------------------------------------------------------*
      64             :  *                                                                             *
      65             :  *  Pointer TOCs.   TODO: This form of table-of-contents should be removed     *
      66             :  *                  because DLLs must be relocated on loading to correct the   *
      67             :  *                  pointer values and this operation makes shared memory      *
      68             :  *                  mapping of the data much less likely to work.              *
      69             :  *                                                                             *
      70             :  *-----------------------------------------------------------------------------*/
      71             : typedef struct {
      72             :     const char       *entryName;
      73             :     const DataHeader *pHeader;
      74             : } PointerTOCEntry;
      75             : 
      76             : 
      77             : typedef struct  {
      78             :     uint32_t          count;
      79             :     uint32_t          reserved;
      80             :     PointerTOCEntry   entry[2];   /* Actual size is from count. */
      81             : }  PointerTOC;
      82             : 
      83             : 
      84             : /* definition of OffsetTOC struct types moved to ucmndata.h */
      85             : 
      86             : /*-----------------------------------------------------------------------------*
      87             :  *                                                                             *
      88             :  *    entry point lookup implementations                                       *
      89             :  *                                                                             *
      90             :  *-----------------------------------------------------------------------------*/
      91             : 
      92             : #ifndef MIN
      93             : #define MIN(a,b) (((a)<(b)) ? (a) : (b))
      94             : #endif
      95             : 
      96             : /**
      97             :  * Compare strings where we know the shared prefix length,
      98             :  * and advance the prefix length as we find that the strings share even more characters.
      99             :  */
     100             : static int32_t
     101          87 : strcmpAfterPrefix(const char *s1, const char *s2, int32_t *pPrefixLength) {
     102          87 :     int32_t pl=*pPrefixLength;
     103          87 :     int32_t cmp=0;
     104          87 :     s1+=pl;
     105          87 :     s2+=pl;
     106             :     for(;;) {
     107         306 :         int32_t c1=(uint8_t)*s1++;
     108         306 :         int32_t c2=(uint8_t)*s2++;
     109         306 :         cmp=c1-c2;
     110         306 :         if(cmp!=0 || c1==0) {  /* different or done */
     111             :             break;
     112             :         }
     113         219 :         ++pl;  /* increment shared same-prefix length */
     114         219 :     }
     115          87 :     *pPrefixLength=pl;
     116          87 :     return cmp;
     117             : }
     118             : 
     119             : static int32_t
     120           7 : offsetTOCPrefixBinarySearch(const char *s, const char *names,
     121             :                             const UDataOffsetTOCEntry *toc, int32_t count) {
     122           7 :     int32_t start=0;
     123           7 :     int32_t limit=count;
     124             :     /*
     125             :      * Remember the shared prefix between s, start and limit,
     126             :      * and don't compare that shared prefix again.
     127             :      * The shared prefix should get longer as we narrow the [start, limit[ range.
     128             :      */
     129           7 :     int32_t startPrefixLength=0;
     130           7 :     int32_t limitPrefixLength=0;
     131           7 :     if(count==0) {
     132           0 :         return -1;
     133             :     }
     134             :     /*
     135             :      * Prime the prefix lengths so that we don't keep prefixLength at 0 until
     136             :      * both the start and limit indexes have moved.
     137             :      * At the same time, we find if s is one of the start and (limit-1) names,
     138             :      * and if not, exclude them from the actual binary search.
     139             :      */
     140           7 :     if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) {
     141           0 :         return 0;
     142             :     }
     143           7 :     ++start;
     144           7 :     --limit;
     145           7 :     if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) {
     146           0 :         return limit;
     147             :     }
     148         139 :     while(start<limit) {
     149          73 :         int32_t i=(start+limit)/2;
     150          73 :         int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
     151          73 :         int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, &prefixLength);
     152          73 :         if(cmp<0) {
     153          34 :             limit=i;
     154          34 :             limitPrefixLength=prefixLength;
     155          39 :         } else if(cmp==0) {
     156           7 :             return i;
     157             :         } else {
     158          32 :             start=i+1;
     159          32 :             startPrefixLength=prefixLength;
     160             :         }
     161             :     }
     162           0 :     return -1;
     163             : }
     164             : 
     165             : static int32_t
     166           7 : pointerTOCPrefixBinarySearch(const char *s, const PointerTOCEntry *toc, int32_t count) {
     167           7 :     int32_t start=0;
     168           7 :     int32_t limit=count;
     169             :     /*
     170             :      * Remember the shared prefix between s, start and limit,
     171             :      * and don't compare that shared prefix again.
     172             :      * The shared prefix should get longer as we narrow the [start, limit[ range.
     173             :      */
     174           7 :     int32_t startPrefixLength=0;
     175           7 :     int32_t limitPrefixLength=0;
     176           7 :     if(count==0) {
     177           7 :         return -1;
     178             :     }
     179             :     /*
     180             :      * Prime the prefix lengths so that we don't keep prefixLength at 0 until
     181             :      * both the start and limit indexes have moved.
     182             :      * At the same time, we find if s is one of the start and (limit-1) names,
     183             :      * and if not, exclude them from the actual binary search.
     184             :      */
     185           0 :     if(0==strcmpAfterPrefix(s, toc[0].entryName, &startPrefixLength)) {
     186           0 :         return 0;
     187             :     }
     188           0 :     ++start;
     189           0 :     --limit;
     190           0 :     if(0==strcmpAfterPrefix(s, toc[limit].entryName, &limitPrefixLength)) {
     191           0 :         return limit;
     192             :     }
     193           0 :     while(start<limit) {
     194           0 :         int32_t i=(start+limit)/2;
     195           0 :         int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
     196           0 :         int32_t cmp=strcmpAfterPrefix(s, toc[i].entryName, &prefixLength);
     197           0 :         if(cmp<0) {
     198           0 :             limit=i;
     199           0 :             limitPrefixLength=prefixLength;
     200           0 :         } else if(cmp==0) {
     201           0 :             return i;
     202             :         } else {
     203           0 :             start=i+1;
     204           0 :             startPrefixLength=prefixLength;
     205             :         }
     206             :     }
     207           0 :     return -1;
     208             : }
     209             : 
     210             : U_CDECL_BEGIN
     211             : static uint32_t U_CALLCONV
     212           0 : offsetTOCEntryCount(const UDataMemory *pData) {
     213           0 :     int32_t          retVal=0;
     214           0 :     const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc;
     215           0 :     if (toc != NULL) {
     216           0 :         retVal = toc->count;
     217             :     }
     218           0 :     return retVal;
     219             : }
     220             : 
     221             : static const DataHeader * U_CALLCONV
     222           7 : offsetTOCLookupFn(const UDataMemory *pData,
     223             :                   const char *tocEntryName,
     224             :                   int32_t *pLength,
     225             :                   UErrorCode *pErrorCode) {
     226             :     (void)pErrorCode;
     227           7 :     const UDataOffsetTOC  *toc = (UDataOffsetTOC *)pData->toc;
     228           7 :     if(toc!=NULL) {
     229           7 :         const char *base=(const char *)toc;
     230           7 :         int32_t number, count=(int32_t)toc->count;
     231             : 
     232             :         /* perform a binary search for the data in the common data's table of contents */
     233             : #if defined (UDATA_DEBUG_DUMP)
     234             :         /* list the contents of the TOC each time .. not recommended */
     235             :         for(number=0; number<count; ++number) {
     236             :             fprintf(stderr, "\tx%d: %s\n", number, &base[toc->entry[number].nameOffset]);
     237             :         }
     238             : #endif
     239           7 :         number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count);
     240           7 :         if(number>=0) {
     241             :             /* found it */
     242           7 :             const UDataOffsetTOCEntry *entry=toc->entry+number;
     243             : #ifdef UDATA_DEBUG
     244             :             fprintf(stderr, "%s: Found.\n", tocEntryName);
     245             : #endif
     246           7 :             if((number+1) < count) {
     247           7 :                 *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset);
     248             :             } else {
     249           0 :                 *pLength = -1;
     250             :             }
     251           7 :             return (const DataHeader *)(base+entry->dataOffset);
     252             :         } else {
     253             : #ifdef UDATA_DEBUG
     254             :             fprintf(stderr, "%s: Not found.\n", tocEntryName);
     255             : #endif
     256           0 :             return NULL;
     257             :         }
     258             :     } else {
     259             : #ifdef UDATA_DEBUG
     260             :         fprintf(stderr, "returning header\n");
     261             : #endif
     262             : 
     263           0 :         return pData->pHeader;
     264             :     }
     265             : }
     266             : 
     267             : 
     268           0 : static uint32_t U_CALLCONV pointerTOCEntryCount(const UDataMemory *pData) {
     269           0 :     const PointerTOC *toc = (PointerTOC *)pData->toc;
     270           0 :     return (uint32_t)((toc != NULL) ? (toc->count) : 0);
     271             : }
     272             : 
     273           7 : static const DataHeader * U_CALLCONV pointerTOCLookupFn(const UDataMemory *pData,
     274             :                    const char *name,
     275             :                    int32_t *pLength,
     276             :                    UErrorCode *pErrorCode) {
     277             :     (void)pErrorCode;
     278           7 :     if(pData->toc!=NULL) {
     279           7 :         const PointerTOC *toc = (PointerTOC *)pData->toc;
     280           7 :         int32_t number, count=(int32_t)toc->count;
     281             : 
     282             : #if defined (UDATA_DEBUG_DUMP)
     283             :         /* list the contents of the TOC each time .. not recommended */
     284             :         for(number=0; number<count; ++number) {
     285             :             fprintf(stderr, "\tx%d: %s\n", number, toc->entry[number].entryName);
     286             :         }
     287             : #endif
     288           7 :         number=pointerTOCPrefixBinarySearch(name, toc->entry, count);
     289           7 :         if(number>=0) {
     290             :             /* found it */
     291             : #ifdef UDATA_DEBUG
     292             :             fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName);
     293             : #endif
     294           0 :             *pLength=-1;
     295           0 :             return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader);
     296             :         } else {
     297             : #ifdef UDATA_DEBUG
     298             :             fprintf(stderr, "%s: Not found.\n", name);
     299             : #endif
     300           7 :             return NULL;
     301             :         }
     302             :     } else {
     303           0 :         return pData->pHeader;
     304             :     }
     305             : }
     306             : U_CDECL_END
     307             : 
     308             : 
     309             : static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn,  offsetTOCEntryCount};
     310             : static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount};
     311             : 
     312             : 
     313             : 
     314             : /*----------------------------------------------------------------------*
     315             :  *                                                                      *
     316             :  *  checkCommonData   Validate the format of a common data file.        *
     317             :  *                    Fill in the virtual function ptr based on TOC type *
     318             :  *                    If the data is invalid, close the UDataMemory     *
     319             :  *                    and set the appropriate error code.               *
     320             :  *                                                                      *
     321             :  *----------------------------------------------------------------------*/
     322           6 : U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) {
     323           6 :     if (U_FAILURE(*err)) {
     324           0 :         return;
     325             :     }
     326             : 
     327           6 :     if(udm==NULL || udm->pHeader==NULL) {
     328           0 :       *err=U_INVALID_FORMAT_ERROR;
     329          18 :     } else if(!(udm->pHeader->dataHeader.magic1==0xda &&
     330          12 :         udm->pHeader->dataHeader.magic2==0x27 &&
     331           6 :         udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN &&
     332           6 :         udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY)
     333             :         ) {
     334             :         /* header not valid */
     335           0 :         *err=U_INVALID_FORMAT_ERROR;
     336             :     }
     337           9 :     else if (udm->pHeader->info.dataFormat[0]==0x43 &&
     338           6 :         udm->pHeader->info.dataFormat[1]==0x6d &&
     339           6 :         udm->pHeader->info.dataFormat[2]==0x6e &&
     340           6 :         udm->pHeader->info.dataFormat[3]==0x44 &&
     341           3 :         udm->pHeader->info.formatVersion[0]==1
     342             :         ) {
     343             :         /* dataFormat="CmnD" */
     344           3 :         udm->vFuncs = &CmnDFuncs;
     345           3 :         udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
     346             :     }
     347           6 :     else if(udm->pHeader->info.dataFormat[0]==0x54 &&
     348           6 :         udm->pHeader->info.dataFormat[1]==0x6f &&
     349           6 :         udm->pHeader->info.dataFormat[2]==0x43 &&
     350           6 :         udm->pHeader->info.dataFormat[3]==0x50 &&
     351           3 :         udm->pHeader->info.formatVersion[0]==1
     352             :         ) {
     353             :         /* dataFormat="ToCP" */
     354           3 :         udm->vFuncs = &ToCPFuncs;
     355           3 :         udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader);
     356             :     }
     357             :     else {
     358             :         /* dataFormat not recognized */
     359           0 :         *err=U_INVALID_FORMAT_ERROR;
     360             :     }
     361             : 
     362           6 :     if (U_FAILURE(*err)) {
     363             :         /* If the data is no good and we memory-mapped it ourselves,
     364             :          *  close the memory mapping so it doesn't leak.  Note that this has
     365             :          *  no effect on non-memory mapped data, other than clearing fields in udm.
     366             :          */
     367           0 :         udata_close(udm);
     368             :     }
     369             : }
     370             : 
     371             : /*
     372             :  * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package
     373             :  * header but not its sub-items.
     374             :  * This function will be needed for automatic runtime swapping.
     375             :  * Sub-items should not be swapped to limit the swapping to the parts of the
     376             :  * package that are actually used.
     377             :  *
     378             :  * Since lengths of items are implicit in the order and offsets of their
     379             :  * ToC entries, and since offsets are relative to the start of the ToC,
     380             :  * a swapped version may need to generate a different data structure
     381             :  * with pointers to the original data items and with their lengths
     382             :  * (-1 for the last one if it is not known), and maybe even pointers to the
     383             :  * swapped versions of the items.
     384             :  * These pointers to swapped versions would establish a cache;
     385             :  * instead, each open data item could simply own the storage for its swapped
     386             :  * data. This fits better with the current design.
     387             :  *
     388             :  * markus 2003sep18 Jitterbug 2235
     389             :  */

Generated by: LCOV version 1.13