LCOV - output.info - parser/expat/lib/xmltok.c

LCOV - code coverage report

Current view:	top level - parser/expat/lib - xmltok.c (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	102	452	22.6 %
Date:	2017-07-14 16:53:18	Functions:	9	37	24.3 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
       2             :    See the file COPYING for copying permission.
       3             : */
       4             : 
       5             : #include <stddef.h>
       6             : 
       7             : #ifdef COMPILED_FROM_DSP
       8             : #include "winconfig.h"
       9             : #elif defined(MACOS_CLASSIC)
      10             : #include "macconfig.h"
      11             : #elif defined(__amigaos4__)
      12             : #include "amigaconfig.h"
      13             : #else
      14             : #ifdef HAVE_EXPAT_CONFIG_H
      15             : #include <expat_config.h>
      16             : #endif
      17             : #endif /* ndef COMPILED_FROM_DSP */
      18             : 
      19             : #include "expat_external.h"
      20             : #include "internal.h"
      21             : #include "xmltok.h"
      22             : #include "nametab.h"
      23             : 
      24             : #ifdef XML_DTD
      25             : #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
      26             : #else
      27             : #define IGNORE_SECTION_TOK_VTABLE /* as nothing */
      28             : #endif
      29             : 
      30             : #define VTABLE1 \
      31             :   { PREFIX(prologTok), PREFIX(contentTok), \
      32             :     PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
      33             :   { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
      34             :   PREFIX(sameName), \
      35             :   PREFIX(nameMatchesAscii), \
      36             :   PREFIX(nameLength), \
      37             :   PREFIX(skipS), \
      38             :   PREFIX(getAtts), \
      39             :   PREFIX(charRefNumber), \
      40             :   PREFIX(predefinedEntityName), \
      41             :   PREFIX(updatePosition), \
      42             :   PREFIX(isPublicId)
      43             : 
      44             : #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
      45             : 
      46             : #define UCS2_GET_NAMING(pages, hi, lo) \
      47             :    (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
      48             : 
      49             : /* A 2 byte UTF-8 representation splits the characters 11 bits between
      50             :    the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
      51             :    pages, 3 bits to add to that index and 5 bits to generate the mask.
      52             : */
      53             : #define UTF8_GET_NAMING2(pages, byte) \
      54             :     (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
      55             :                       + ((((byte)[0]) & 3) << 1) \
      56             :                       + ((((byte)[1]) >> 5) & 1)] \
      57             :          & (1 << (((byte)[1]) & 0x1F)))
      58             : 
      59             : /* A 3 byte UTF-8 representation splits the characters 16 bits between
      60             :    the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
      61             :    into pages, 3 bits to add to that index and 5 bits to generate the
      62             :    mask.
      63             : */
      64             : #define UTF8_GET_NAMING3(pages, byte) \
      65             :   (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
      66             :                              + ((((byte)[1]) >> 2) & 0xF)] \
      67             :                        << 3) \
      68             :                       + ((((byte)[1]) & 3) << 1) \
      69             :                       + ((((byte)[2]) >> 5) & 1)] \
      70             :          & (1 << (((byte)[2]) & 0x1F)))
      71             : 
      72             : #define UTF8_GET_NAMING(pages, p, n) \
      73             :   ((n) == 2 \
      74             :   ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
      75             :   : ((n) == 3 \
      76             :      ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
      77             :      : 0))
      78             : 
      79             : /* Detection of invalid UTF-8 sequences is based on Table 3.1B
      80             :    of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
      81             :    with the additional restriction of not allowing the Unicode
      82             :    code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
      83             :    Implementation details:
      84             :      (A & 0x80) == 0     means A < 0x80
      85             :    and
      86             :      (A & 0xC0) == 0xC0  means A > 0xBF
      87             : */
      88             : 
      89             : #define UTF8_INVALID2(p) \
      90             :   ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
      91             : 
      92             : #define UTF8_INVALID3(p) \
      93             :   (((p)[2] & 0x80) == 0 \
      94             :   || \
      95             :   ((*p) == 0xEF && (p)[1] == 0xBF \
      96             :     ? \
      97             :     (p)[2] > 0xBD \
      98             :     : \
      99             :     ((p)[2] & 0xC0) == 0xC0) \
     100             :   || \
     101             :   ((*p) == 0xE0 \
     102             :     ? \
     103             :     (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \
     104             :     : \
     105             :     ((p)[1] & 0x80) == 0 \
     106             :     || \
     107             :     ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
     108             : 
     109             : #define UTF8_INVALID4(p) \
     110             :   (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \
     111             :   || \
     112             :   ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \
     113             :   || \
     114             :   ((*p) == 0xF0 \
     115             :     ? \
     116             :     (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \
     117             :     : \
     118             :     ((p)[1] & 0x80) == 0 \
     119             :     || \
     120             :     ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
     121             : 
     122             : static int PTRFASTCALL
     123           0 : isNever(const ENCODING *enc, const char *p)
     124             : {
     125           0 :   return 0;
     126             : }
     127             : 
     128             : static int PTRFASTCALL
     129           0 : utf8_isName2(const ENCODING *enc, const char *p)
     130             : {
     131           0 :   return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
     132             : }
     133             : 
     134             : static int PTRFASTCALL
     135           0 : utf8_isName3(const ENCODING *enc, const char *p)
     136             : {
     137           0 :   return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
     138             : }
     139             : 
     140             : #define utf8_isName4 isNever
     141             : 
     142             : static int PTRFASTCALL
     143           0 : utf8_isNmstrt2(const ENCODING *enc, const char *p)
     144             : {
     145           0 :   return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
     146             : }
     147             : 
     148             : static int PTRFASTCALL
     149           0 : utf8_isNmstrt3(const ENCODING *enc, const char *p)
     150             : {
     151           0 :   return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
     152             : }
     153             : 
     154             : #define utf8_isNmstrt4 isNever
     155             : 
     156             : static int PTRFASTCALL
     157           0 : utf8_isInvalid2(const ENCODING *enc, const char *p)
     158             : {
     159           0 :   return UTF8_INVALID2((const unsigned char *)p);
     160             : }
     161             : 
     162             : static int PTRFASTCALL
     163           0 : utf8_isInvalid3(const ENCODING *enc, const char *p)
     164             : {
     165           0 :   return UTF8_INVALID3((const unsigned char *)p);
     166             : }
     167             : 
     168             : static int PTRFASTCALL
     169           0 : utf8_isInvalid4(const ENCODING *enc, const char *p)
     170             : {
     171           0 :   return UTF8_INVALID4((const unsigned char *)p);
     172             : }
     173             : 
     174             : struct normal_encoding {
     175             :   ENCODING enc;
     176             :   unsigned char type[256];
     177             : #ifdef XML_MIN_SIZE
     178             :   int (PTRFASTCALL *byteType)(const ENCODING *, const char *);
     179             :   int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
     180             :   int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
     181             :   int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
     182             :   int (PTRCALL *charMatches)(const ENCODING *, const char *, int);
     183             : #endif /* XML_MIN_SIZE */
     184             :   int (PTRFASTCALL *isName2)(const ENCODING *, const char *);
     185             :   int (PTRFASTCALL *isName3)(const ENCODING *, const char *);
     186             :   int (PTRFASTCALL *isName4)(const ENCODING *, const char *);
     187             :   int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
     188             :   int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
     189             :   int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
     190             :   int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
     191             :   int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
     192             :   int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
     193             : };
     194             : 
     195             : #define AS_NORMAL_ENCODING(enc)   ((const struct normal_encoding *) (enc))
     196             : 
     197             : #ifdef XML_MIN_SIZE
     198             : 
     199             : #define STANDARD_VTABLE(E) \
     200             :  E ## byteType, \
     201             :  E ## isNameMin, \
     202             :  E ## isNmstrtMin, \
     203             :  E ## byteToAscii, \
     204             :  E ## charMatches,
     205             : 
     206             : #else
     207             : 
     208             : #define STANDARD_VTABLE(E) /* as nothing */
     209             : 
     210             : #endif
     211             : 
     212             : #define NORMAL_VTABLE(E) \
     213             :  E ## isName2, \
     214             :  E ## isName3, \
     215             :  E ## isName4, \
     216             :  E ## isNmstrt2, \
     217             :  E ## isNmstrt3, \
     218             :  E ## isNmstrt4, \
     219             :  E ## isInvalid2, \
     220             :  E ## isInvalid3, \
     221             :  E ## isInvalid4
     222             : 
     223             : static int FASTCALL checkCharRefNumber(int);
     224             : 
     225             : #include "xmltok_impl.h"
     226             : #include "ascii.h"
     227             : 
     228             : #ifdef XML_MIN_SIZE
     229             : #define sb_isNameMin isNever
     230             : #define sb_isNmstrtMin isNever
     231             : #endif
     232             : 
     233             : #ifdef XML_MIN_SIZE
     234             : #define MINBPC(enc) ((enc)->minBytesPerChar)
     235             : #else
     236             : /* minimum bytes per character */
     237             : #define MINBPC(enc) 1
     238             : #endif
     239             : 
     240             : #define SB_BYTE_TYPE(enc, p) \
     241             :   (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
     242             : 
     243             : #ifdef XML_MIN_SIZE
     244             : static int PTRFASTCALL
     245             : sb_byteType(const ENCODING *enc, const char *p)
     246             : {
     247             :   return SB_BYTE_TYPE(enc, p);
     248             : }
     249             : #define BYTE_TYPE(enc, p) \
     250             :  (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
     251             : #else
     252             : #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
     253             : #endif
     254             : 
     255             : #ifdef XML_MIN_SIZE
     256             : #define BYTE_TO_ASCII(enc, p) \
     257             :  (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
     258             : static int PTRFASTCALL
     259             : sb_byteToAscii(const ENCODING *enc, const char *p)
     260             : {
     261             :   return *p;
     262             : }
     263             : #else
     264             : #define BYTE_TO_ASCII(enc, p) (*(p))
     265             : #endif
     266             : 
     267             : #define IS_NAME_CHAR(enc, p, n) \
     268             :  (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))
     269             : #define IS_NMSTRT_CHAR(enc, p, n) \
     270             :  (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))
     271             : #define IS_INVALID_CHAR(enc, p, n) \
     272             :  (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))
     273             : 
     274             : #ifdef XML_MIN_SIZE
     275             : #define IS_NAME_CHAR_MINBPC(enc, p) \
     276             :  (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
     277             : #define IS_NMSTRT_CHAR_MINBPC(enc, p) \
     278             :  (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
     279             : #else
     280             : #define IS_NAME_CHAR_MINBPC(enc, p) (0)
     281             : #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
     282             : #endif
     283             : 
     284             : #ifdef XML_MIN_SIZE
     285             : #define CHAR_MATCHES(enc, p, c) \
     286             :  (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
     287             : static int PTRCALL
     288             : sb_charMatches(const ENCODING *enc, const char *p, int c)
     289             : {
     290             :   return *p == c;
     291             : }
     292             : #else
     293             : /* c is an ASCII character */
     294             : #define CHAR_MATCHES(enc, p, c) (*(p) == c)
     295             : #endif
     296             : 
     297             : #define PREFIX(ident) normal_ ## ident
     298             : #include "xmltok_impl.c"
     299             : 
     300             : #undef MINBPC
     301             : #undef BYTE_TYPE
     302             : #undef BYTE_TO_ASCII
     303             : #undef CHAR_MATCHES
     304             : #undef IS_NAME_CHAR
     305             : #undef IS_NAME_CHAR_MINBPC
     306             : #undef IS_NMSTRT_CHAR
     307             : #undef IS_NMSTRT_CHAR_MINBPC
     308             : #undef IS_INVALID_CHAR
     309             : 
     310             : enum {  /* UTF8_cvalN is value of masked first byte of N byte sequence */
     311             :   UTF8_cval1 = 0x00,
     312             :   UTF8_cval2 = 0xc0,
     313             :   UTF8_cval3 = 0xe0,
     314             :   UTF8_cval4 = 0xf0
     315             : };
     316             : 
     317             : static void PTRCALL
     318           0 : utf8_toUtf8(const ENCODING *enc,
     319             :             const char **fromP, const char *fromLim,
     320             :             char **toP, const char *toLim)
     321             : {
     322             :   char *to;
     323             :   const char *from;
     324           0 :   if (fromLim - *fromP > toLim - *toP) {
     325             :     /* Avoid copying partial characters. */
     326           0 :     for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
     327           0 :       if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
     328           0 :         break;
     329             :   }
     330           0 :   for (to = *toP, from = *fromP; from != fromLim; from++, to++)
     331           0 :     *to = *from;
     332           0 :   *fromP = from;
     333           0 :   *toP = to;
     334           0 : }
     335             : 
     336             : static void PTRCALL
     337           0 : utf8_toUtf16(const ENCODING *enc,
     338             :              const char **fromP, const char *fromLim,
     339             :              unsigned short **toP, const unsigned short *toLim)
     340             : {
     341           0 :   unsigned short *to = *toP;
     342           0 :   const char *from = *fromP;
     343           0 :   while (from != fromLim && to != toLim) {
     344           0 :     switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
     345             :     case BT_LEAD2:
     346           0 :       *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
     347           0 :       from += 2;
     348           0 :       break;
     349             :     case BT_LEAD3:
     350           0 :       *to++ = (unsigned short)(((from[0] & 0xf) << 12)
     351           0 :                                | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));
     352           0 :       from += 3;
     353           0 :       break;
     354             :     case BT_LEAD4:
     355             :       {
     356             :         unsigned long n;
     357           0 :         if (to + 1 == toLim)
     358           0 :           goto after;
     359           0 :         n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
     360           0 :             | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
     361           0 :         n -= 0x10000;
     362           0 :         to[0] = (unsigned short)((n >> 10) | 0xD800);
     363           0 :         to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
     364           0 :         to += 2;
     365           0 :         from += 4;
     366             :       }
     367           0 :       break;
     368             :     default:
     369           0 :       *to++ = *from++;
     370           0 :       break;
     371             :     }
     372             :   }
     373             : after:
     374           0 :   *fromP = from;
     375           0 :   *toP = to;
     376           0 : }
     377             : 
     378             : #ifdef XML_NS
     379             : static const struct normal_encoding utf8_encoding_ns = {
     380             :   { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
     381             :   {
     382             : #include "asciitab.h"
     383             : #include "utf8tab.h"
     384             :   },
     385             :   STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
     386             : };
     387             : #endif
     388             : 
     389             : static const struct normal_encoding utf8_encoding = {
     390             :   { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
     391             :   {
     392             : #define BT_COLON BT_NMSTRT
     393             : #include "asciitab.h"
     394             : #undef BT_COLON
     395             : #include "utf8tab.h"
     396             :   },
     397             :   STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
     398             : };
     399             : 
     400             : #ifdef XML_NS
     401             : 
     402             : static const struct normal_encoding internal_utf8_encoding_ns = {
     403             :   { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
     404             :   {
     405             : #include "iasciitab.h"
     406             : #include "utf8tab.h"
     407             :   },
     408             :   STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
     409             : };
     410             : 
     411             : #endif
     412             : 
     413             : static const struct normal_encoding internal_utf8_encoding = {
     414             :   { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
     415             :   {
     416             : #define BT_COLON BT_NMSTRT
     417             : #include "iasciitab.h"
     418             : #undef BT_COLON
     419             : #include "utf8tab.h"
     420             :   },
     421             :   STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
     422             : };
     423             : 
     424             : static void PTRCALL
     425           0 : latin1_toUtf8(const ENCODING *enc,
     426             :               const char **fromP, const char *fromLim,
     427             :               char **toP, const char *toLim)
     428             : {
     429           0 :   for (;;) {
     430             :     unsigned char c;
     431           0 :     if (*fromP == fromLim)
     432           0 :       break;
     433           0 :     c = (unsigned char)**fromP;
     434           0 :     if (c & 0x80) {
     435           0 :       if (toLim - *toP < 2)
     436           0 :         break;
     437           0 :       *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
     438           0 :       *(*toP)++ = (char)((c & 0x3f) | 0x80);
     439           0 :       (*fromP)++;
     440             :     }
     441             :     else {
     442           0 :       if (*toP == toLim)
     443           0 :         break;
     444           0 :       *(*toP)++ = *(*fromP)++;
     445             :     }
     446             :   }
     447           0 : }
     448             : 
     449             : static void PTRCALL
     450           0 : latin1_toUtf16(const ENCODING *enc,
     451             :                const char **fromP, const char *fromLim,
     452             :                unsigned short **toP, const unsigned short *toLim)
     453             : {
     454           0 :   while (*fromP != fromLim && *toP != toLim)
     455           0 :     *(*toP)++ = (unsigned char)*(*fromP)++;
     456           0 : }
     457             : 
     458             : #ifdef XML_NS
     459             : 
     460             : static const struct normal_encoding latin1_encoding_ns = {
     461             :   { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
     462             :   {
     463             : #include "asciitab.h"
     464             : #include "latin1tab.h"
     465             :   },
     466             :   STANDARD_VTABLE(sb_)
     467             : };
     468             : 
     469             : #endif
     470             : 
     471             : static const struct normal_encoding latin1_encoding = {
     472             :   { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
     473             :   {
     474             : #define BT_COLON BT_NMSTRT
     475             : #include "asciitab.h"
     476             : #undef BT_COLON
     477             : #include "latin1tab.h"
     478             :   },
     479             :   STANDARD_VTABLE(sb_)
     480             : };
     481             : 
     482             : static void PTRCALL
     483           0 : ascii_toUtf8(const ENCODING *enc,
     484             :              const char **fromP, const char *fromLim,
     485             :              char **toP, const char *toLim)
     486             : {
     487           0 :   while (*fromP != fromLim && *toP != toLim)
     488           0 :     *(*toP)++ = *(*fromP)++;
     489           0 : }
     490             : 
     491             : #ifdef XML_NS
     492             : 
     493             : static const struct normal_encoding ascii_encoding_ns = {
     494             :   { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
     495             :   {
     496             : #include "asciitab.h"
     497             : /* BT_NONXML == 0 */
     498             :   },
     499             :   STANDARD_VTABLE(sb_)
     500             : };
     501             : 
     502             : #endif
     503             : 
     504             : static const struct normal_encoding ascii_encoding = {
     505             :   { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
     506             :   {
     507             : #define BT_COLON BT_NMSTRT
     508             : #include "asciitab.h"
     509             : #undef BT_COLON
     510             : /* BT_NONXML == 0 */
     511             :   },
     512             :   STANDARD_VTABLE(sb_)
     513             : };
     514             : 
     515             : static int PTRFASTCALL
     516           0 : unicode_byte_type(char hi, char lo)
     517             : {
     518           0 :   switch ((unsigned char)hi) {
     519             :   case 0xD8: case 0xD9: case 0xDA: case 0xDB:
     520           0 :     return BT_LEAD4;
     521             :   case 0xDC: case 0xDD: case 0xDE: case 0xDF:
     522           0 :     return BT_TRAIL;
     523             :   case 0xFF:
     524           0 :     switch ((unsigned char)lo) {
     525             :     case 0xFF:
     526             :     case 0xFE:
     527           0 :       return BT_NONXML;
     528             :     }
     529           0 :     break;
     530             :   }
     531           0 :   return BT_NONASCII;
     532             : }
     533             : 
     534             : #define DEFINE_UTF16_TO_UTF8(E) \
     535             : static void  PTRCALL \
     536             : E ## toUtf8(const ENCODING *enc, \
     537             :             const char **fromP, const char *fromLim, \
     538             :             char **toP, const char *toLim) \
     539             : { \
     540             :   const char *from; \
     541             :   for (from = *fromP; from != fromLim; from += 2) { \
     542             :     int plane; \
     543             :     unsigned char lo2; \
     544             :     unsigned char lo = GET_LO(from); \
     545             :     unsigned char hi = GET_HI(from); \
     546             :     switch (hi) { \
     547             :     case 0: \
     548             :       if (lo < 0x80) { \
     549             :         if (*toP == toLim) { \
     550             :           *fromP = from; \
     551             :           return; \
     552             :         } \
     553             :         *(*toP)++ = lo; \
     554             :         break; \
     555             :       } \
     556             :       /* fall through */ \
     557             :     case 0x1: case 0x2: case 0x3: \
     558             :     case 0x4: case 0x5: case 0x6: case 0x7: \
     559             :       if (toLim -  *toP < 2) { \
     560             :         *fromP = from; \
     561             :         return; \
     562             :       } \
     563             :       *(*toP)++ = ((lo >> 6) | (hi << 2) |  UTF8_cval2); \
     564             :       *(*toP)++ = ((lo & 0x3f) | 0x80); \
     565             :       break; \
     566             :     default: \
     567             :       if (toLim -  *toP < 3)  { \
     568             :         *fromP = from; \
     569             :         return; \
     570             :       } \
     571             :       /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
     572             :       *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
     573             :       *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
     574             :       *(*toP)++ = ((lo & 0x3f) | 0x80); \
     575             :       break; \
     576             :     case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
     577             :       if (toLim -  *toP < 4) { \
     578             :         *fromP = from; \
     579             :         return; \
     580             :       } \
     581             :       plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
     582             :       *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
     583             :       *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
     584             :       from += 2; \
     585             :       lo2 = GET_LO(from); \
     586             :       *(*toP)++ = (((lo & 0x3) << 4) \
     587             :                    | ((GET_HI(from) & 0x3) << 2) \
     588             :                    | (lo2 >> 6) \
     589             :                    | 0x80); \
     590             :       *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
     591             :       break; \
     592             :     } \
     593             :   } \
     594             :   *fromP = from; \
     595             : }
     596             : 
     597             : #define DEFINE_UTF16_TO_UTF16(E) \
     598             : static void  PTRCALL \
     599             : E ## toUtf16(const ENCODING *enc, \
     600             :              const char **fromP, const char *fromLim, \
     601             :              unsigned short **toP, const unsigned short *toLim) \
     602             : { \
     603             :   /* Avoid copying first half only of surrogate */ \
     604             :   if (fromLim - *fromP > ((toLim - *toP) << 1) \
     605             :       && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
     606             :     fromLim -= 2; \
     607             :   for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
     608             :     *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
     609             : }
     610             : 
     611             : #define SET2(ptr, ch) \
     612             :   (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
     613             : #define GET_LO(ptr) ((unsigned char)(ptr)[0])
     614             : #define GET_HI(ptr) ((unsigned char)(ptr)[1])
     615             : 
     616         120 : DEFINE_UTF16_TO_UTF8(little2_)
     617         903 : DEFINE_UTF16_TO_UTF16(little2_)
     618             : 
     619             : #undef SET2
     620             : #undef GET_LO
     621             : #undef GET_HI
     622             : 
     623             : #define SET2(ptr, ch) \
     624             :   (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
     625             : #define GET_LO(ptr) ((unsigned char)(ptr)[1])
     626             : #define GET_HI(ptr) ((unsigned char)(ptr)[0])
     627             : 
     628           0 : DEFINE_UTF16_TO_UTF8(big2_)
     629           0 : DEFINE_UTF16_TO_UTF16(big2_)
     630             : 
     631             : #undef SET2
     632             : #undef GET_LO
     633             : #undef GET_HI
     634             : 
     635             : #define LITTLE2_BYTE_TYPE(enc, p) \
     636             :  ((p)[1] == 0 \
     637             :   ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
     638             :   : unicode_byte_type((p)[1], (p)[0]))
     639             : #define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
     640             : #define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
     641             : #define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
     642             :   UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
     643             : #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
     644             :   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
     645             : 
     646             : #ifdef XML_MIN_SIZE
     647             : 
     648             : static int PTRFASTCALL
     649             : little2_byteType(const ENCODING *enc, const char *p)
     650             : {
     651             :   return LITTLE2_BYTE_TYPE(enc, p);
     652             : }
     653             : 
     654             : static int PTRFASTCALL
     655             : little2_byteToAscii(const ENCODING *enc, const char *p)
     656             : {
     657             :   return LITTLE2_BYTE_TO_ASCII(enc, p);
     658             : }
     659             : 
     660             : static int PTRCALL
     661             : little2_charMatches(const ENCODING *enc, const char *p, int c)
     662             : {
     663             :   return LITTLE2_CHAR_MATCHES(enc, p, c);
     664             : }
     665             : 
     666             : static int PTRFASTCALL
     667             : little2_isNameMin(const ENCODING *enc, const char *p)
     668             : {
     669             :   return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
     670             : }
     671             : 
     672             : static int PTRFASTCALL
     673             : little2_isNmstrtMin(const ENCODING *enc, const char *p)
     674             : {
     675             :   return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
     676             : }
     677             : 
     678             : #undef VTABLE
     679             : #define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
     680             : 
     681             : #else /* not XML_MIN_SIZE */
     682             : 
     683             : #undef PREFIX
     684             : #define PREFIX(ident) little2_ ## ident
     685             : #define MINBPC(enc) 2
     686             : /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
     687             : #define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
     688             : #define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
     689             : #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
     690             : #define IS_NAME_CHAR(enc, p, n) 0
     691             : #define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
     692             : #define IS_NMSTRT_CHAR(enc, p, n) (0)
     693             : #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
     694             : 
     695             : #include "xmltok_impl.c"
     696             : 
     697             : #undef MINBPC
     698             : #undef BYTE_TYPE
     699             : #undef BYTE_TO_ASCII
     700             : #undef CHAR_MATCHES
     701             : #undef IS_NAME_CHAR
     702             : #undef IS_NAME_CHAR_MINBPC
     703             : #undef IS_NMSTRT_CHAR
     704             : #undef IS_NMSTRT_CHAR_MINBPC
     705             : #undef IS_INVALID_CHAR
     706             : 
     707             : #endif /* not XML_MIN_SIZE */
     708             : 
     709             : #ifdef XML_NS
     710             : 
     711             : static const struct normal_encoding little2_encoding_ns = {
     712             :   { VTABLE, 2, 0,
     713             : #if BYTEORDER == 1234
     714             :     1
     715             : #else
     716             :     0
     717             : #endif
     718             :   },
     719             :   {
     720             : #include "asciitab.h"
     721             : #include "latin1tab.h"
     722             :   },
     723             :   STANDARD_VTABLE(little2_)
     724             : };
     725             : 
     726             : #endif
     727             : 
     728             : static const struct normal_encoding little2_encoding = {
     729             :   { VTABLE, 2, 0,
     730             : #if BYTEORDER == 1234
     731             :     1
     732             : #else
     733             :     0
     734             : #endif
     735             :   },
     736             :   {
     737             : #define BT_COLON BT_NMSTRT
     738             : #include "asciitab.h"
     739             : #undef BT_COLON
     740             : #include "latin1tab.h"
     741             :   },
     742             :   STANDARD_VTABLE(little2_)
     743             : };
     744             : 
     745             : #if BYTEORDER != 4321
     746             : 
     747             : #ifdef XML_NS
     748             : 
     749             : static const struct normal_encoding internal_little2_encoding_ns = {
     750             :   { VTABLE, 2, 0, 1 },
     751             :   {
     752             : #include "iasciitab.h"
     753             : #include "latin1tab.h"
     754             :   },
     755             :   STANDARD_VTABLE(little2_)
     756             : };
     757             : 
     758             : #endif
     759             : 
     760             : static const struct normal_encoding internal_little2_encoding = {
     761             :   { VTABLE, 2, 0, 1 },
     762             :   {
     763             : #define BT_COLON BT_NMSTRT
     764             : #include "iasciitab.h"
     765             : #undef BT_COLON
     766             : #include "latin1tab.h"
     767             :   },
     768             :   STANDARD_VTABLE(little2_)
     769             : };
     770             : 
     771             : #endif
     772             : 
     773             : 
     774             : #define BIG2_BYTE_TYPE(enc, p) \
     775             :  ((p)[0] == 0 \
     776             :   ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
     777             :   : unicode_byte_type((p)[0], (p)[1]))
     778             : #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
     779             : #define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
     780             : #define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
     781             :   UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
     782             : #define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
     783             :   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
     784             : 
     785             : #ifdef XML_MIN_SIZE
     786             : 
     787             : static int PTRFASTCALL
     788             : big2_byteType(const ENCODING *enc, const char *p)
     789             : {
     790             :   return BIG2_BYTE_TYPE(enc, p);
     791             : }
     792             : 
     793             : static int PTRFASTCALL
     794             : big2_byteToAscii(const ENCODING *enc, const char *p)
     795             : {
     796             :   return BIG2_BYTE_TO_ASCII(enc, p);
     797             : }
     798             : 
     799             : static int PTRCALL
     800             : big2_charMatches(const ENCODING *enc, const char *p, int c)
     801             : {
     802             :   return BIG2_CHAR_MATCHES(enc, p, c);
     803             : }
     804             : 
     805             : static int PTRFASTCALL
     806             : big2_isNameMin(const ENCODING *enc, const char *p)
     807             : {
     808             :   return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
     809             : }
     810             : 
     811             : static int PTRFASTCALL
     812             : big2_isNmstrtMin(const ENCODING *enc, const char *p)
     813             : {
     814             :   return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
     815             : }
     816             : 
     817             : #undef VTABLE
     818             : #define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
     819             : 
     820             : #else /* not XML_MIN_SIZE */
     821             : 
     822             : #undef PREFIX
     823             : #define PREFIX(ident) big2_ ## ident
     824             : #define MINBPC(enc) 2
     825             : /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
     826             : #define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
     827             : #define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
     828             : #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
     829             : #define IS_NAME_CHAR(enc, p, n) 0
     830             : #define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
     831             : #define IS_NMSTRT_CHAR(enc, p, n) (0)
     832             : #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
     833             : 
     834             : #include "xmltok_impl.c"
     835             : 
     836             : #undef MINBPC
     837             : #undef BYTE_TYPE
     838             : #undef BYTE_TO_ASCII
     839             : #undef CHAR_MATCHES
     840             : #undef IS_NAME_CHAR
     841             : #undef IS_NAME_CHAR_MINBPC
     842             : #undef IS_NMSTRT_CHAR
     843             : #undef IS_NMSTRT_CHAR_MINBPC
     844             : #undef IS_INVALID_CHAR
     845             : 
     846             : #endif /* not XML_MIN_SIZE */
     847             : 
     848             : #ifdef XML_NS
     849             : 
     850             : static const struct normal_encoding big2_encoding_ns = {
     851             :   { VTABLE, 2, 0,
     852             : #if BYTEORDER == 4321
     853             :   1
     854             : #else
     855             :   0
     856             : #endif
     857             :   },
     858             :   {
     859             : #include "asciitab.h"
     860             : #include "latin1tab.h"
     861             :   },
     862             :   STANDARD_VTABLE(big2_)
     863             : };
     864             : 
     865             : #endif
     866             : 
     867             : static const struct normal_encoding big2_encoding = {
     868             :   { VTABLE, 2, 0,
     869             : #if BYTEORDER == 4321
     870             :   1
     871             : #else
     872             :   0
     873             : #endif
     874             :   },
     875             :   {
     876             : #define BT_COLON BT_NMSTRT
     877             : #include "asciitab.h"
     878             : #undef BT_COLON
     879             : #include "latin1tab.h"
     880             :   },
     881             :   STANDARD_VTABLE(big2_)
     882             : };
     883             : 
     884             : #if BYTEORDER != 1234
     885             : 
     886             : #ifdef XML_NS
     887             : 
     888             : static const struct normal_encoding internal_big2_encoding_ns = {
     889             :   { VTABLE, 2, 0, 1 },
     890             :   {
     891             : #include "iasciitab.h"
     892             : #include "latin1tab.h"
     893             :   },
     894             :   STANDARD_VTABLE(big2_)
     895             : };
     896             : 
     897             : #endif
     898             : 
     899             : static const struct normal_encoding internal_big2_encoding = {
     900             :   { VTABLE, 2, 0, 1 },
     901             :   {
     902             : #define BT_COLON BT_NMSTRT
     903             : #include "iasciitab.h"
     904             : #undef BT_COLON
     905             : #include "latin1tab.h"
     906             :   },
     907             :   STANDARD_VTABLE(big2_)
     908             : };
     909             : 
     910             : #endif
     911             : 
     912             : #undef PREFIX
     913             : 
     914             : static int FASTCALL
     915         372 : streqci(const char *s1, const char *s2)
     916             : {
     917         272 :   for (;;) {
     918         372 :     char c1 = *s1++;
     919         372 :     char c2 = *s2++;
     920         372 :     if (ASCII_a <= c1 && c1 <= ASCII_z)
     921          27 :       c1 += ASCII_A - ASCII_a;
     922         372 :     if (ASCII_a <= c2 && c2 <= ASCII_z)
     923           0 :       c2 += ASCII_A - ASCII_a;
     924         372 :     if (c1 != c2)
     925          75 :       return 0;
     926         297 :     if (!c1)
     927          25 :       break;
     928             :   }
     929          25 :   return 1;
     930             : }
     931             : 
     932             : static void PTRCALL
     933           0 : initUpdatePosition(const ENCODING *enc, const char *ptr,
     934             :                    const char *end, POSITION *pos)
     935             : {
     936           0 :   normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
     937           0 : }
     938             : 
     939             : static int
     940         117 : toAscii(const ENCODING *enc, const char *ptr, const char *end)
     941             : {
     942             :   char buf[1];
     943         117 :   char *p = buf;
     944         117 :   XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
     945         117 :   if (p == buf)
     946           0 :     return -1;
     947             :   else
     948         117 :     return buf[0];
     949             : }
     950             : 
     951             : static int FASTCALL
     952          73 : isSpace(int c)
     953             : {
     954          73 :   switch (c) {
     955             :   case 0x20:
     956             :   case 0xD:
     957             :   case 0xA:
     958             :   case 0x9:
     959           7 :     return 1;
     960             :   }
     961          66 :   return 0;
     962             : }
     963             : 
     964             : /* Return 1 if there's just optional white space or there's an S
     965             :    followed by name=val.
     966             : */
     967             : static int
     968          11 : parsePseudoAttribute(const ENCODING *enc,
     969             :                      const char *ptr,
     970             :                      const char *end,
     971             :                      const char **namePtr,
     972             :                      const char **nameEndPtr,
     973             :                      const char **valPtr,
     974             :                      const char **nextTokPtr)
     975             : {
     976             :   int c;
     977             :   char open;
     978          11 :   if (ptr == end) {
     979           4 :     *namePtr = NULL;
     980           4 :     return 1;
     981             :   }
     982           7 :   if (!isSpace(toAscii(enc, ptr, end))) {
     983           0 :     *nextTokPtr = ptr;
     984           0 :     return 0;
     985             :   }
     986             :   do {
     987           7 :     ptr += enc->minBytesPerChar;
     988           7 :   } while (isSpace(toAscii(enc, ptr, end)));
     989           7 :   if (ptr == end) {
     990           0 :     *namePtr = NULL;
     991           0 :     return 1;
     992             :   }
     993           7 :   *namePtr = ptr;
     994             :   for (;;) {
     995         111 :     c = toAscii(enc, ptr, end);
     996          59 :     if (c == -1) {
     997           0 :       *nextTokPtr = ptr;
     998           0 :       return 0;
     999             :     }
    1000          59 :     if (c == ASCII_EQUALS) {
    1001           7 :       *nameEndPtr = ptr;
    1002           7 :       break;
    1003             :     }
    1004          52 :     if (isSpace(c)) {
    1005           0 :       *nameEndPtr = ptr;
    1006             :       do {
    1007           0 :         ptr += enc->minBytesPerChar;
    1008           0 :       } while (isSpace(c = toAscii(enc, ptr, end)));
    1009           0 :       if (c != ASCII_EQUALS) {
    1010           0 :         *nextTokPtr = ptr;
    1011           0 :         return 0;
    1012             :       }
    1013           0 :       break;
    1014             :     }
    1015          52 :     ptr += enc->minBytesPerChar;
    1016             :   }
    1017           7 :   if (ptr == *namePtr) {
    1018           0 :     *nextTokPtr = ptr;
    1019           0 :     return 0;
    1020             :   }
    1021           7 :   ptr += enc->minBytesPerChar;
    1022           7 :   c = toAscii(enc, ptr, end);
    1023          14 :   while (isSpace(c)) {
    1024           0 :     ptr += enc->minBytesPerChar;
    1025           0 :     c = toAscii(enc, ptr, end);
    1026             :   }
    1027           7 :   if (c != ASCII_QUOT && c != ASCII_APOS) {
    1028           0 :     *nextTokPtr = ptr;
    1029           0 :     return 0;
    1030             :   }
    1031           7 :   open = (char)c;
    1032           7 :   ptr += enc->minBytesPerChar;
    1033           7 :   *valPtr = ptr;
    1034          27 :   for (;; ptr += enc->minBytesPerChar) {
    1035          61 :     c = toAscii(enc, ptr, end);
    1036          34 :     if (c == open)
    1037           7 :       break;
    1038          27 :     if (!(ASCII_a <= c && c <= ASCII_z)
    1039          18 :         && !(ASCII_A <= c && c <= ASCII_Z)
    1040          18 :         && !(ASCII_0 <= c && c <= ASCII_9)
    1041           7 :         && c != ASCII_PERIOD
    1042           3 :         && c != ASCII_MINUS
    1043           0 :         && c != ASCII_UNDERSCORE) {
    1044           0 :       *nextTokPtr = ptr;
    1045           0 :       return 0;
    1046             :     }
    1047             :   }
    1048           7 :   *nextTokPtr = ptr + enc->minBytesPerChar;
    1049           7 :   return 1;
    1050             : }
    1051             : 
    1052             : static const char KW_version[] = {
    1053             :   ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'
    1054             : };
    1055             : 
    1056             : static const char KW_encoding[] = {
    1057             :   ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0'
    1058             : };
    1059             : 
    1060             : static const char KW_standalone[] = {
    1061             :   ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o,
    1062             :   ASCII_n, ASCII_e, '\0'
    1063             : };
    1064             : 
    1065             : static const char KW_yes[] = {
    1066             :   ASCII_y, ASCII_e, ASCII_s,  '\0'
    1067             : };
    1068             : 
    1069             : static const char KW_no[] = {
    1070             :   ASCII_n, ASCII_o,  '\0'
    1071             : };
    1072             : 
    1073             : /* BEGIN MOZILLA CHANGE (http://bugzilla.mozilla.org/show_bug.cgi?id=62157) */
    1074             : static const char KW_XML_1_0[] = {
    1075             :   ASCII_1, ASCII_PERIOD, ASCII_0, '\0'
    1076             : };
    1077             : /* END MOZILLA CHANGE */
    1078             : 
    1079             : static int
    1080           4 : doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
    1081             :                                                  const char *,
    1082             :                                                  const char *),
    1083             :                int isGeneralTextEntity,
    1084             :                const ENCODING *enc,
    1085             :                const char *ptr,
    1086             :                const char *end,
    1087             :                const char **badPtr,
    1088             :                const char **versionPtr,
    1089             :                const char **versionEndPtr,
    1090             :                const char **encodingName,
    1091             :                const ENCODING **encoding,
    1092             :                int *standalone)
    1093             : {
    1094           4 :   const char *val = NULL;
    1095           4 :   const char *name = NULL;
    1096           4 :   const char *nameEnd = NULL;
    1097           4 :   ptr += 5 * enc->minBytesPerChar;
    1098           4 :   end -= 2 * enc->minBytesPerChar;
    1099           4 :   if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
    1100           4 :       || !name) {
    1101           0 :     *badPtr = ptr;
    1102           0 :     return 0;
    1103             :   }
    1104           4 :   if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
    1105           0 :     if (!isGeneralTextEntity) {
    1106           0 :       *badPtr = name;
    1107           0 :       return 0;
    1108             :     }
    1109             :   }
    1110             :   else {
    1111           4 :     if (versionPtr)
    1112           4 :       *versionPtr = val;
    1113           4 :     if (versionEndPtr)
    1114           4 :       *versionEndPtr = ptr;
    1115             : /* BEGIN MOZILLA CHANGE (http://bugzilla.mozilla.org/show_bug.cgi?id=62157) */
    1116             :      /* Anything else but a version="1.0" is invalid for us, until we support later versions. */
    1117           4 :      if (!XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_XML_1_0)) {
    1118           0 :        *badPtr = val;
    1119           0 :        return 0;
    1120             :      }
    1121             : /* END MOZILLA CHANGE */
    1122           4 :     if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
    1123           0 :       *badPtr = ptr;
    1124           0 :       return 0;
    1125             :     }
    1126           4 :     if (!name) {
    1127           1 :       if (isGeneralTextEntity) {
    1128             :         /* a TextDecl must have an EncodingDecl */
    1129           0 :         *badPtr = ptr;
    1130           0 :         return 0;
    1131             :       }
    1132           1 :       return 1;
    1133             :     }
    1134             :   }
    1135           3 :   if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
    1136           3 :     int c = toAscii(enc, val, end);
    1137           3 :     if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) {
    1138           0 :       *badPtr = val;
    1139           0 :       return 0;
    1140             :     }
    1141           3 :     if (encodingName)
    1142           3 :       *encodingName = val;
    1143           3 :     if (encoding)
    1144           3 :       *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
    1145           3 :     if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
    1146           0 :       *badPtr = ptr;
    1147           0 :       return 0;
    1148             :     }
    1149           3 :     if (!name)
    1150           3 :       return 1;
    1151             :   }
    1152           0 :   if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
    1153           0 :       || isGeneralTextEntity) {
    1154           0 :     *badPtr = name;
    1155           0 :     return 0;
    1156             :   }
    1157           0 :   if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
    1158           0 :     if (standalone)
    1159           0 :       *standalone = 1;
    1160             :   }
    1161           0 :   else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
    1162           0 :     if (standalone)
    1163           0 :       *standalone = 0;
    1164             :   }
    1165             :   else {
    1166           0 :     *badPtr = val;
    1167           0 :     return 0;
    1168             :   }
    1169           0 :   while (isSpace(toAscii(enc, ptr, end)))
    1170           0 :     ptr += enc->minBytesPerChar;
    1171           0 :   if (ptr != end) {
    1172           0 :     *badPtr = ptr;
    1173           0 :     return 0;
    1174             :   }
    1175           0 :   return 1;
    1176             : }
    1177             : 
    1178             : static int FASTCALL
    1179           0 : checkCharRefNumber(int result)
    1180             : {
    1181           0 :   switch (result >> 8) {
    1182             :   case 0xD8: case 0xD9: case 0xDA: case 0xDB:
    1183             :   case 0xDC: case 0xDD: case 0xDE: case 0xDF:
    1184           0 :     return -1;
    1185             :   case 0:
    1186           0 :     if (latin1_encoding.type[result] == BT_NONXML)
    1187           0 :       return -1;
    1188           0 :     break;
    1189             :   case 0xFF:
    1190           0 :     if (result == 0xFFFE || result == 0xFFFF)
    1191           0 :       return -1;
    1192           0 :     break;
    1193             :   }
    1194           0 :   return result;
    1195             : }
    1196             : 
    1197             : int FASTCALL
    1198           0 : XmlUtf8Encode(int c, char *buf)
    1199             : {
    1200             :   enum {
    1201             :     /* minN is minimum legal resulting value for N byte sequence */
    1202             :     min2 = 0x80,
    1203             :     min3 = 0x800,
    1204             :     min4 = 0x10000
    1205             :   };
    1206             : 
    1207           0 :   if (c < 0)
    1208           0 :     return 0;
    1209           0 :   if (c < min2) {
    1210           0 :     buf[0] = (char)(c | UTF8_cval1);
    1211           0 :     return 1;
    1212             :   }
    1213           0 :   if (c < min3) {
    1214           0 :     buf[0] = (char)((c >> 6) | UTF8_cval2);
    1215           0 :     buf[1] = (char)((c & 0x3f) | 0x80);
    1216           0 :     return 2;
    1217             :   }
    1218           0 :   if (c < min4) {
    1219           0 :     buf[0] = (char)((c >> 12) | UTF8_cval3);
    1220           0 :     buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
    1221           0 :     buf[2] = (char)((c & 0x3f) | 0x80);
    1222           0 :     return 3;
    1223             :   }
    1224           0 :   if (c < 0x110000) {
    1225           0 :     buf[0] = (char)((c >> 18) | UTF8_cval4);
    1226           0 :     buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
    1227           0 :     buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
    1228           0 :     buf[3] = (char)((c & 0x3f) | 0x80);
    1229           0 :     return 4;
    1230             :   }
    1231           0 :   return 0;
    1232             : }
    1233             : 
    1234             : int FASTCALL
    1235           0 : XmlUtf16Encode(int charNum, unsigned short *buf)
    1236             : {
    1237           0 :   if (charNum < 0)
    1238           0 :     return 0;
    1239           0 :   if (charNum < 0x10000) {
    1240           0 :     buf[0] = (unsigned short)charNum;
    1241           0 :     return 1;
    1242             :   }
    1243           0 :   if (charNum < 0x110000) {
    1244           0 :     charNum -= 0x10000;
    1245           0 :     buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
    1246           0 :     buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
    1247           0 :     return 2;
    1248             :   }
    1249           0 :   return 0;
    1250             : }
    1251             : 
    1252             : struct unknown_encoding {
    1253             :   struct normal_encoding normal;
    1254             :   CONVERTER convert;
    1255             :   void *userData;
    1256             :   unsigned short utf16[256];
    1257             :   char utf8[256][4];
    1258             : };
    1259             : 
    1260             : #define AS_UNKNOWN_ENCODING(enc)  ((const struct unknown_encoding *) (enc))
    1261             : 
    1262             : int
    1263           0 : XmlSizeOfUnknownEncoding(void)
    1264             : {
    1265           0 :   return sizeof(struct unknown_encoding);
    1266             : }
    1267             : 
    1268             : static int PTRFASTCALL
    1269           0 : unknown_isName(const ENCODING *enc, const char *p)
    1270             : {
    1271           0 :   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
    1272           0 :   int c = uenc->convert(uenc->userData, p);
    1273           0 :   if (c & ~0xFFFF)
    1274           0 :     return 0;
    1275           0 :   return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
    1276             : }
    1277             : 
    1278             : static int PTRFASTCALL
    1279           0 : unknown_isNmstrt(const ENCODING *enc, const char *p)
    1280             : {
    1281           0 :   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
    1282           0 :   int c = uenc->convert(uenc->userData, p);
    1283           0 :   if (c & ~0xFFFF)
    1284           0 :     return 0;
    1285           0 :   return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
    1286             : }
    1287             : 
    1288             : static int PTRFASTCALL
    1289           0 : unknown_isInvalid(const ENCODING *enc, const char *p)
    1290             : {
    1291           0 :   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
    1292           0 :   int c = uenc->convert(uenc->userData, p);
    1293           0 :   return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
    1294             : }
    1295             : 
    1296             : static void PTRCALL
    1297           0 : unknown_toUtf8(const ENCODING *enc,
    1298             :                const char **fromP, const char *fromLim,
    1299             :                char **toP, const char *toLim)
    1300             : {
    1301           0 :   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
    1302             :   char buf[XML_UTF8_ENCODE_MAX];
    1303           0 :   for (;;) {
    1304             :     const char *utf8;
    1305             :     int n;
    1306           0 :     if (*fromP == fromLim)
    1307           0 :       break;
    1308           0 :     utf8 = uenc->utf8[(unsigned char)**fromP];
    1309           0 :     n = *utf8++;
    1310           0 :     if (n == 0) {
    1311           0 :       int c = uenc->convert(uenc->userData, *fromP);
    1312           0 :       n = XmlUtf8Encode(c, buf);
    1313           0 :       if (n > toLim - *toP)
    1314           0 :         break;
    1315           0 :       utf8 = buf;
    1316           0 :       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
    1317             :                  - (BT_LEAD2 - 2));
    1318             :     }
    1319             :     else {
    1320           0 :       if (n > toLim - *toP)
    1321           0 :         break;
    1322           0 :       (*fromP)++;
    1323             :     }
    1324             :     do {
    1325           0 :       *(*toP)++ = *utf8++;
    1326           0 :     } while (--n != 0);
    1327             :   }
    1328           0 : }
    1329             : 
    1330             : static void PTRCALL
    1331           0 : unknown_toUtf16(const ENCODING *enc,
    1332             :                 const char **fromP, const char *fromLim,
    1333             :                 unsigned short **toP, const unsigned short *toLim)
    1334             : {
    1335           0 :   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
    1336           0 :   while (*fromP != fromLim && *toP != toLim) {
    1337           0 :     unsigned short c = uenc->utf16[(unsigned char)**fromP];
    1338           0 :     if (c == 0) {
    1339           0 :       c = (unsigned short)
    1340           0 :           uenc->convert(uenc->userData, *fromP);
    1341           0 :       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
    1342             :                  - (BT_LEAD2 - 2));
    1343             :     }
    1344             :     else
    1345           0 :       (*fromP)++;
    1346           0 :     *(*toP)++ = c;
    1347             :   }
    1348           0 : }
    1349             : 
    1350             : ENCODING *
    1351           0 : XmlInitUnknownEncoding(void *mem,
    1352             :                        int *table,
    1353             :                        CONVERTER convert, 
    1354             :                        void *userData)
    1355             : {
    1356             :   int i;
    1357           0 :   struct unknown_encoding *e = (struct unknown_encoding *)mem;
    1358           0 :   for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
    1359           0 :     ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
    1360           0 :   for (i = 0; i < 128; i++)
    1361           0 :     if (latin1_encoding.type[i] != BT_OTHER
    1362           0 :         && latin1_encoding.type[i] != BT_NONXML
    1363           0 :         && table[i] != i)
    1364           0 :       return 0;
    1365           0 :   for (i = 0; i < 256; i++) {
    1366           0 :     int c = table[i];
    1367           0 :     if (c == -1) {
    1368           0 :       e->normal.type[i] = BT_MALFORM;
    1369             :       /* This shouldn't really get used. */
    1370           0 :       e->utf16[i] = 0xFFFF;
    1371           0 :       e->utf8[i][0] = 1;
    1372           0 :       e->utf8[i][1] = 0;
    1373             :     }
    1374           0 :     else if (c < 0) {
    1375           0 :       if (c < -4)
    1376           0 :         return 0;
    1377           0 :       e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
    1378           0 :       e->utf8[i][0] = 0;
    1379           0 :       e->utf16[i] = 0;
    1380             :     }
    1381           0 :     else if (c < 0x80) {
    1382           0 :       if (latin1_encoding.type[c] != BT_OTHER
    1383           0 :           && latin1_encoding.type[c] != BT_NONXML
    1384           0 :           && c != i)
    1385           0 :         return 0;
    1386           0 :       e->normal.type[i] = latin1_encoding.type[c];
    1387           0 :       e->utf8[i][0] = 1;
    1388           0 :       e->utf8[i][1] = (char)c;
    1389           0 :       e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
    1390             :     }
    1391           0 :     else if (checkCharRefNumber(c) < 0) {
    1392           0 :       e->normal.type[i] = BT_NONXML;
    1393             :       /* This shouldn't really get used. */
    1394           0 :       e->utf16[i] = 0xFFFF;
    1395           0 :       e->utf8[i][0] = 1;
    1396           0 :       e->utf8[i][1] = 0;
    1397             :     }
    1398             :     else {
    1399           0 :       if (c > 0xFFFF)
    1400           0 :         return 0;
    1401           0 :       if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
    1402           0 :         e->normal.type[i] = BT_NMSTRT;
    1403           0 :       else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
    1404           0 :         e->normal.type[i] = BT_NAME;
    1405             :       else
    1406           0 :         e->normal.type[i] = BT_OTHER;
    1407           0 :       e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
    1408           0 :       e->utf16[i] = (unsigned short)c;
    1409             :     }
    1410             :   }
    1411           0 :   e->userData = userData;
    1412           0 :   e->convert = convert;
    1413           0 :   if (convert) {
    1414           0 :     e->normal.isName2 = unknown_isName;
    1415           0 :     e->normal.isName3 = unknown_isName;
    1416           0 :     e->normal.isName4 = unknown_isName;
    1417           0 :     e->normal.isNmstrt2 = unknown_isNmstrt;
    1418           0 :     e->normal.isNmstrt3 = unknown_isNmstrt;
    1419           0 :     e->normal.isNmstrt4 = unknown_isNmstrt;
    1420           0 :     e->normal.isInvalid2 = unknown_isInvalid;
    1421           0 :     e->normal.isInvalid3 = unknown_isInvalid;
    1422           0 :     e->normal.isInvalid4 = unknown_isInvalid;
    1423             :   }
    1424           0 :   e->normal.enc.utf8Convert = unknown_toUtf8;
    1425           0 :   e->normal.enc.utf16Convert = unknown_toUtf16;
    1426           0 :   return &(e->normal.enc);
    1427             : }
    1428             : 
    1429             : /* If this enumeration is changed, getEncodingIndex and encodings
    1430             : must also be changed. */
    1431             : enum {
    1432             :   UNKNOWN_ENC = -1,
    1433             :   ISO_8859_1_ENC = 0,
    1434             :   US_ASCII_ENC,
    1435             :   UTF_8_ENC,
    1436             :   UTF_16_ENC,
    1437             :   UTF_16BE_ENC,
    1438             :   UTF_16LE_ENC,
    1439             :   /* must match encodingNames up to here */
    1440             :   NO_ENC
    1441             : };
    1442             : 
    1443             : static const char KW_ISO_8859_1[] = {
    1444             :   ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9,
    1445             :   ASCII_MINUS, ASCII_1, '\0'
    1446             : };
    1447             : static const char KW_US_ASCII[] = {
    1448             :   ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I,
    1449             :   '\0'
    1450             : };
    1451             : static const char KW_UTF_8[] =  {
    1452             :   ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'
    1453             : };
    1454             : static const char KW_UTF_16[] = {
    1455             :   ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'
    1456             : };
    1457             : static const char KW_UTF_16BE[] = {
    1458             :   ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E,
    1459             :   '\0'
    1460             : };
    1461             : static const char KW_UTF_16LE[] = {
    1462             :   ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E,
    1463             :   '\0'
    1464             : };
    1465             : 
    1466             : static int FASTCALL
    1467          47 : getEncodingIndex(const char *name)
    1468             : {
    1469             :   static const char * const encodingNames[] = {
    1470             :     KW_ISO_8859_1,
    1471             :     KW_US_ASCII,
    1472             :     KW_UTF_8,
    1473             :     KW_UTF_16,
    1474             :     KW_UTF_16BE,
    1475             :     KW_UTF_16LE,
    1476             :   };
    1477             :   int i;
    1478          47 :   if (name == NULL)
    1479          22 :     return NO_ENC;
    1480          97 :   for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
    1481          97 :     if (streqci(name, encodingNames[i]))
    1482          25 :       return i;
    1483           0 :   return UNKNOWN_ENC;
    1484             : }
    1485             : 
    1486             : /* For binary compatibility, we store the index of the encoding
    1487             :    specified at initialization in the isUtf16 member.
    1488             : */
    1489             : 
    1490             : #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
    1491             : #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
    1492             : 
    1493             : /* This is what detects the encoding.  encodingTable maps from
    1494             :    encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
    1495             :    the external (protocol) specified encoding; state is
    1496             :    XML_CONTENT_STATE if we're parsing an external text entity, and
    1497             :    XML_PROLOG_STATE otherwise.
    1498             : */
    1499             : 
    1500             : 
    1501             : static int
    1502          22 : initScan(const ENCODING * const *encodingTable,
    1503             :          const INIT_ENCODING *enc,
    1504             :          int state,
    1505             :          const char *ptr,
    1506             :          const char *end,
    1507             :          const char **nextTokPtr)
    1508             : {
    1509             :   const ENCODING **encPtr;
    1510             : 
    1511          22 :   if (ptr == end)
    1512           0 :     return XML_TOK_NONE;
    1513          22 :   encPtr = enc->encPtr;
    1514          22 :   if (ptr + 1 == end) {
    1515             :     /* only a single byte available for auto-detection */
    1516             : #ifndef XML_DTD /* FIXME */
    1517             :     /* a well-formed document entity must have more than one byte */
    1518             :     if (state != XML_CONTENT_STATE)
    1519             :       return XML_TOK_PARTIAL;
    1520             : #endif
    1521             :     /* so we're parsing an external text entity... */
    1522             :     /* if UTF-16 was externally specified, then we need at least 2 bytes */
    1523           0 :     switch (INIT_ENC_INDEX(enc)) {
    1524             :     case UTF_16_ENC:
    1525             :     case UTF_16LE_ENC:
    1526             :     case UTF_16BE_ENC:
    1527           0 :       return XML_TOK_PARTIAL;
    1528             :     }
    1529           0 :     switch ((unsigned char)*ptr) {
    1530             :     case 0xFE:
    1531             :     case 0xFF:
    1532             :     case 0xEF: /* possibly first byte of UTF-8 BOM */
    1533           0 :       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
    1534           0 :           && state == XML_CONTENT_STATE)
    1535           0 :         break;
    1536             :       /* fall through */
    1537             :     case 0x00:
    1538             :     case 0x3C:
    1539           0 :       return XML_TOK_PARTIAL;
    1540             :     }
    1541             :   }
    1542             :   else {
    1543          22 :     switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
    1544             :     case 0xFEFF:
    1545           0 :       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
    1546           0 :           && state == XML_CONTENT_STATE)
    1547           0 :         break;
    1548           0 :       *nextTokPtr = ptr + 2;
    1549           0 :       *encPtr = encodingTable[UTF_16BE_ENC];
    1550           0 :       return XML_TOK_BOM;
    1551             :     /* 00 3C is handled in the default case */
    1552             :     case 0x3C00:
    1553          22 :       if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
    1554          22 :            || INIT_ENC_INDEX(enc) == UTF_16_ENC)
    1555          22 :           && state == XML_CONTENT_STATE)
    1556           0 :         break;
    1557          22 :       *encPtr = encodingTable[UTF_16LE_ENC];
    1558          22 :       return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
    1559             :     case 0xFFFE:
    1560           0 :       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
    1561           0 :           && state == XML_CONTENT_STATE)
    1562           0 :         break;
    1563           0 :       *nextTokPtr = ptr + 2;
    1564           0 :       *encPtr = encodingTable[UTF_16LE_ENC];
    1565           0 :       return XML_TOK_BOM;
    1566             :     case 0xEFBB:
    1567             :       /* Maybe a UTF-8 BOM (EF BB BF) */
    1568             :       /* If there's an explicitly specified (external) encoding
    1569             :          of ISO-8859-1 or some flavour of UTF-16
    1570             :          and this is an external text entity,
    1571             :          don't look for the BOM,
    1572             :          because it might be a legal data.
    1573             :       */
    1574           0 :       if (state == XML_CONTENT_STATE) {
    1575           0 :         int e = INIT_ENC_INDEX(enc);
    1576           0 :         if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC
    1577           0 :             || e == UTF_16LE_ENC || e == UTF_16_ENC)
    1578             :           break;
    1579             :       }
    1580           0 :       if (ptr + 2 == end)
    1581           0 :         return XML_TOK_PARTIAL;
    1582           0 :       if ((unsigned char)ptr[2] == 0xBF) {
    1583           0 :         *nextTokPtr = ptr + 3;
    1584           0 :         *encPtr = encodingTable[UTF_8_ENC];
    1585           0 :         return XML_TOK_BOM;
    1586             :       }
    1587           0 :       break;
    1588             :     default:
    1589           0 :       if (ptr[0] == '\0') {
    1590             :         /* 0 isn't a legal data character. Furthermore a document
    1591             :            entity can only start with ASCII characters.  So the only
    1592             :            way this can fail to be big-endian UTF-16 if it it's an
    1593             :            external parsed general entity that's labelled as
    1594             :            UTF-16LE.
    1595             :         */
    1596           0 :         if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
    1597           0 :           break;
    1598           0 :         *encPtr = encodingTable[UTF_16BE_ENC];
    1599           0 :         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
    1600             :       }
    1601           0 :       else if (ptr[1] == '\0') {
    1602             :         /* We could recover here in the case:
    1603             :             - parsing an external entity
    1604             :             - second byte is 0
    1605             :             - no externally specified encoding
    1606             :             - no encoding declaration
    1607             :            by assuming UTF-16LE.  But we don't, because this would mean when
    1608             :            presented just with a single byte, we couldn't reliably determine
    1609             :            whether we needed further bytes.
    1610             :         */
    1611           0 :         if (state == XML_CONTENT_STATE)
    1612           0 :           break;
    1613           0 :         *encPtr = encodingTable[UTF_16LE_ENC];
    1614           0 :         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
    1615             :       }
    1616           0 :       break;
    1617             :     }
    1618             :   }
    1619           0 :   *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
    1620           0 :   return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
    1621             : }
    1622             : 
    1623             : 
    1624             : #define NS(x) x
    1625             : #define ns(x) x
    1626             : #include "xmltok_ns.c"
    1627             : #undef NS
    1628             : #undef ns
    1629             : 
    1630             : #ifdef XML_NS
    1631             : 
    1632             : #define NS(x) x ## NS
    1633             : #define ns(x) x ## _ns
    1634             : 
    1635             : #include "xmltok_ns.c"
    1636             : 
    1637             : #undef NS
    1638             : #undef ns
    1639             : 
    1640             : ENCODING *
    1641           0 : XmlInitUnknownEncodingNS(void *mem,
    1642             :                          int *table,
    1643             :                          CONVERTER convert, 
    1644             :                          void *userData)
    1645             : {
    1646           0 :   ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
    1647           0 :   if (enc)
    1648           0 :     ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
    1649           0 :   return enc;
    1650             : }
    1651             : 
    1652             : #endif /* XML_NS */
    1653             : 
    1654             : /* BEGIN MOZILLA CHANGE (Mozilla extensions for QName checking) */
    1655             : #ifdef MOZILLA_CLIENT
    1656             : #include "moz_extensions.c"
    1657             : #endif /* MOZILLA_CLIENT */
    1658             : /* END MOZILLA CHANGE */

Generated by: LCOV version 1.13