Home | History | Annotate | Line # | Download | only in lib
      1 /*
      2                             __  __            _
      3                          ___\ \/ /_ __   __ _| |_
      4                         / _ \\  /| '_ \ / _` | __|
      5                        |  __//  \| |_) | (_| | |_
      6                         \___/_/\_\ .__/ \__,_|\__|
      7                                  |_| XML parser
      8 
      9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
     10    Copyright (c) 2000      Clark Cooper <coopercc (at) users.sourceforge.net>
     11    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake (at) users.sourceforge.net>
     12    Copyright (c) 2002      Greg Stein <gstein (at) users.sourceforge.net>
     13    Copyright (c) 2002-2016 Karl Waclawek <karl (at) waclawek.net>
     14    Copyright (c) 2005-2009 Steven Solie <steven (at) solie.ca>
     15    Copyright (c) 2016-2024 Sebastian Pipping <sebastian (at) pipping.org>
     16    Copyright (c) 2016      Pascal Cuoq <cuoq (at) trust-in-soft.com>
     17    Copyright (c) 2016      Don Lewis <truckman (at) apache.org>
     18    Copyright (c) 2017      Rhodri James <rhodri (at) wildebeest.org.uk>
     19    Copyright (c) 2017      Alexander Bluhm <alexander.bluhm (at) gmx.net>
     20    Copyright (c) 2017      Benbuck Nason <bnason (at) netflix.com>
     21    Copyright (c) 2017      Jos Gutirrez de la Concha <jose (at) zeroc.com>
     22    Copyright (c) 2019      David Loffredo <loffredo (at) steptools.com>
     23    Copyright (c) 2021      Donghee Na <donghee.na (at) python.org>
     24    Copyright (c) 2022      Martin Ettl <ettl.martin78 (at) googlemail.com>
     25    Copyright (c) 2022      Sean McBride <sean (at) rogue-research.com>
     26    Copyright (c) 2023      Hanno Bck <hanno (at) gentoo.org>
     27    Copyright (c) 2025      Alfonso Gregory <gfunni234 (at) gmail.com>
     28    Licensed under the MIT license:
     29 
     30    Permission is  hereby granted,  free of charge,  to any  person obtaining
     31    a  copy  of  this  software   and  associated  documentation  files  (the
     32    "Software"),  to  deal in  the  Software  without restriction,  including
     33    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
     34    distribute, sublicense, and/or sell copies of the Software, and to permit
     35    persons  to whom  the Software  is  furnished to  do so,  subject to  the
     36    following conditions:
     37 
     38    The above copyright  notice and this permission notice  shall be included
     39    in all copies or substantial portions of the Software.
     40 
     41    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
     42    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
     43    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
     44    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
     45    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
     46    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     47    USE OR OTHER DEALINGS IN THE SOFTWARE.
     48 */
     49 
     50 #include "expat_config.h"
     51 
     52 #include <stddef.h>
     53 #include <string.h> /* memcpy */
     54 #include <stdbool.h>
     55 
     56 #ifdef _WIN32
     57 #  include "winconfig.h"
     58 #endif
     59 
     60 #include "internal.h"
     61 #include "xmltok.h"
     62 #include "nametab.h"
     63 
     64 #ifdef XML_DTD
     65 #  define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
     66 #else
     67 #  define IGNORE_SECTION_TOK_VTABLE /* as nothing */
     68 #endif
     69 
     70 #define VTABLE1                                                                \
     71   {PREFIX(prologTok), PREFIX(contentTok),                                      \
     72    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE},                         \
     73       {PREFIX(attributeValueTok), PREFIX(entityValueTok)},                     \
     74       PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS),             \
     75       PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName),    \
     76       PREFIX(updatePosition), PREFIX(isPublicId)
     77 
     78 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
     79 
     80 #define UCS2_GET_NAMING(pages, hi, lo)                                         \
     81   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
     82 
     83 /* A 2 byte UTF-8 representation splits the characters 11 bits between
     84    the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
     85    pages, 3 bits to add to that index and 5 bits to generate the mask.
     86 */
     87 #define UTF8_GET_NAMING2(pages, byte)                                          \
     88   (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3)                         \
     89                 + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)]         \
     90    & (1u << (((byte)[1]) & 0x1F)))
     91 
     92 /* A 3 byte UTF-8 representation splits the characters 16 bits between
     93    the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
     94    into pages, 3 bits to add to that index and 5 bits to generate the
     95    mask.
     96 */
     97 #define UTF8_GET_NAMING3(pages, byte)                                          \
     98   (namingBitmap                                                                \
     99        [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)]      \
    100          << 3)                                                                 \
    101         + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)]                 \
    102    & (1u << (((byte)[2]) & 0x1F)))
    103 
    104 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
    105    of Unicode 3.2: https://www.unicode.org/unicode/reports/tr28/
    106    with the additional restriction of not allowing the Unicode
    107    code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
    108    Implementation details:
    109      (A & 0x80) == 0     means A < 0x80
    110    and
    111      (A & 0xC0) == 0xC0  means A > 0xBF
    112 */
    113 
    114 #define UTF8_INVALID2(p)                                                       \
    115   ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
    116 
    117 #define UTF8_INVALID3(p)                                                       \
    118   (((p)[2] & 0x80) == 0                                                        \
    119    || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD                          \
    120                                       : ((p)[2] & 0xC0) == 0xC0)               \
    121    || ((*p) == 0xE0                                                            \
    122            ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0                          \
    123            : ((p)[1] & 0x80) == 0                                              \
    124                  || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
    125 
    126 #define UTF8_INVALID4(p)                                                       \
    127   (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0     \
    128    || ((p)[2] & 0xC0) == 0xC0                                                  \
    129    || ((*p) == 0xF0                                                            \
    130            ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0                          \
    131            : ((p)[1] & 0x80) == 0                                              \
    132                  || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
    133 
    134 static int PTRFASTCALL
    135 isNever(const ENCODING *enc, const char *p) {
    136   UNUSED_P(enc);
    137   UNUSED_P(p);
    138   return 0;
    139 }
    140 
    141 static int PTRFASTCALL
    142 utf8_isName2(const ENCODING *enc, const char *p) {
    143   UNUSED_P(enc);
    144   return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
    145 }
    146 
    147 static int PTRFASTCALL
    148 utf8_isName3(const ENCODING *enc, const char *p) {
    149   UNUSED_P(enc);
    150   return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
    151 }
    152 
    153 #define utf8_isName4 isNever
    154 
    155 static int PTRFASTCALL
    156 utf8_isNmstrt2(const ENCODING *enc, const char *p) {
    157   UNUSED_P(enc);
    158   return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
    159 }
    160 
    161 static int PTRFASTCALL
    162 utf8_isNmstrt3(const ENCODING *enc, const char *p) {
    163   UNUSED_P(enc);
    164   return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
    165 }
    166 
    167 #define utf8_isNmstrt4 isNever
    168 
    169 static int PTRFASTCALL
    170 utf8_isInvalid2(const ENCODING *enc, const char *p) {
    171   UNUSED_P(enc);
    172   return UTF8_INVALID2((const unsigned char *)p);
    173 }
    174 
    175 static int PTRFASTCALL
    176 utf8_isInvalid3(const ENCODING *enc, const char *p) {
    177   UNUSED_P(enc);
    178   return UTF8_INVALID3((const unsigned char *)p);
    179 }
    180 
    181 static int PTRFASTCALL
    182 utf8_isInvalid4(const ENCODING *enc, const char *p) {
    183   UNUSED_P(enc);
    184   return UTF8_INVALID4((const unsigned char *)p);
    185 }
    186 
    187 struct normal_encoding {
    188   ENCODING enc;
    189   unsigned char type[256];
    190 #ifdef XML_MIN_SIZE
    191   int(PTRFASTCALL *byteType)(const ENCODING *, const char *);
    192   int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
    193   int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
    194   int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
    195   int(PTRCALL *charMatches)(const ENCODING *, const char *, int);
    196 #endif /* XML_MIN_SIZE */
    197   int(PTRFASTCALL *isName2)(const ENCODING *, const char *);
    198   int(PTRFASTCALL *isName3)(const ENCODING *, const char *);
    199   int(PTRFASTCALL *isName4)(const ENCODING *, const char *);
    200   int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
    201   int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
    202   int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
    203   int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
    204   int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
    205   int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
    206 };
    207 
    208 #define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc))
    209 
    210 #ifdef XML_MIN_SIZE
    211 
    212 #  define STANDARD_VTABLE(E)                                                   \
    213     E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches,
    214 
    215 #else
    216 
    217 #  define STANDARD_VTABLE(E) /* as nothing */
    218 
    219 #endif
    220 
    221 #define NORMAL_VTABLE(E)                                                       \
    222   E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3,              \
    223       E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4
    224 
    225 #define NULL_VTABLE                                                            \
    226   /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL,                  \
    227       /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL,        \
    228       /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL
    229 
    230 static int FASTCALL checkCharRefNumber(int result);
    231 
    232 #include "xmltok_impl.h"
    233 #include "ascii.h"
    234 
    235 #ifdef XML_MIN_SIZE
    236 #  define sb_isNameMin isNever
    237 #  define sb_isNmstrtMin isNever
    238 #endif
    239 
    240 #ifdef XML_MIN_SIZE
    241 #  define MINBPC(enc) ((enc)->minBytesPerChar)
    242 #else
    243 /* minimum bytes per character */
    244 #  define MINBPC(enc) 1
    245 #endif
    246 
    247 #define SB_BYTE_TYPE(enc, p)                                                   \
    248   (((const struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
    249 
    250 #ifdef XML_MIN_SIZE
    251 static int PTRFASTCALL
    252 sb_byteType(const ENCODING *enc, const char *p) {
    253   return SB_BYTE_TYPE(enc, p);
    254 }
    255 #  define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
    256 #else
    257 #  define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
    258 #endif
    259 
    260 #ifdef XML_MIN_SIZE
    261 #  define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
    262 static int PTRFASTCALL
    263 sb_byteToAscii(const ENCODING *enc, const char *p) {
    264   UNUSED_P(enc);
    265   return *p;
    266 }
    267 #else
    268 #  define BYTE_TO_ASCII(enc, p) (*(p))
    269 #endif
    270 
    271 #define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
    272 #define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
    273 #ifdef XML_MIN_SIZE
    274 #  define IS_INVALID_CHAR(enc, p, n)                                           \
    275     (AS_NORMAL_ENCODING(enc)->isInvalid##n                                     \
    276      && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
    277 #else
    278 #  define IS_INVALID_CHAR(enc, p, n)                                           \
    279     (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
    280 #endif
    281 
    282 #ifdef XML_MIN_SIZE
    283 #  define IS_NAME_CHAR_MINBPC(enc, p)                                          \
    284     (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
    285 #  define IS_NMSTRT_CHAR_MINBPC(enc, p)                                        \
    286     (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
    287 #else
    288 #  define IS_NAME_CHAR_MINBPC(enc, p) (0)
    289 #  define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
    290 #endif
    291 
    292 #ifdef XML_MIN_SIZE
    293 #  define CHAR_MATCHES(enc, p, c)                                              \
    294     (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
    295 static int PTRCALL
    296 sb_charMatches(const ENCODING *enc, const char *p, int c) {
    297   UNUSED_P(enc);
    298   return *p == c;
    299 }
    300 #else
    301 /* c is an ASCII character */
    302 #  define CHAR_MATCHES(enc, p, c) (*(p) == (c))
    303 #endif
    304 
    305 #define PREFIX(ident) normal_##ident
    306 #define XML_TOK_IMPL_C
    307 #include "xmltok_impl.c"
    308 #undef XML_TOK_IMPL_C
    309 
    310 #undef MINBPC
    311 #undef BYTE_TYPE
    312 #undef BYTE_TO_ASCII
    313 #undef CHAR_MATCHES
    314 #undef IS_NAME_CHAR
    315 #undef IS_NAME_CHAR_MINBPC
    316 #undef IS_NMSTRT_CHAR
    317 #undef IS_NMSTRT_CHAR_MINBPC
    318 #undef IS_INVALID_CHAR
    319 
    320 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
    321        UTF8_cval1 = 0x00,
    322        UTF8_cval2 = 0xc0,
    323        UTF8_cval3 = 0xe0,
    324        UTF8_cval4 = 0xf0
    325 };
    326 
    327 void
    328 _INTERNAL_trim_to_complete_utf8_characters(const char *from,
    329                                            const char **fromLimRef) {
    330   const char *fromLim = *fromLimRef;
    331   size_t walked = 0;
    332   for (; fromLim > from; fromLim--, walked++) {
    333     const unsigned char prev = (unsigned char)fromLim[-1];
    334     if ((prev & 0xf8u)
    335         == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
    336       if (walked + 1 >= 4) {
    337         fromLim += 4 - 1;
    338         break;
    339       } else {
    340         walked = 0;
    341       }
    342     } else if ((prev & 0xf0u)
    343                == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
    344       if (walked + 1 >= 3) {
    345         fromLim += 3 - 1;
    346         break;
    347       } else {
    348         walked = 0;
    349       }
    350     } else if ((prev & 0xe0u)
    351                == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
    352       if (walked + 1 >= 2) {
    353         fromLim += 2 - 1;
    354         break;
    355       } else {
    356         walked = 0;
    357       }
    358     } else if ((prev & 0x80u)
    359                == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
    360       break;
    361     }
    362   }
    363   *fromLimRef = fromLim;
    364 }
    365 
    366 static enum XML_Convert_Result PTRCALL
    367 utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
    368             char **toP, const char *toLim) {
    369   bool input_incomplete = false;
    370   bool output_exhausted = false;
    371 
    372   /* Avoid copying partial characters (due to limited space). */
    373   const ptrdiff_t bytesAvailable = fromLim - *fromP;
    374   const ptrdiff_t bytesStorable = toLim - *toP;
    375   UNUSED_P(enc);
    376   if (bytesAvailable > bytesStorable) {
    377     fromLim = *fromP + bytesStorable;
    378     output_exhausted = true;
    379   }
    380 
    381   /* Avoid copying partial characters (from incomplete input). */
    382   {
    383     const char *const fromLimBefore = fromLim;
    384     _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
    385     if (fromLim < fromLimBefore) {
    386       input_incomplete = true;
    387     }
    388   }
    389 
    390   {
    391     const ptrdiff_t bytesToCopy = fromLim - *fromP;
    392     memcpy(*toP, *fromP, bytesToCopy);
    393     *fromP += bytesToCopy;
    394     *toP += bytesToCopy;
    395   }
    396 
    397   if (output_exhausted) /* needs to go first */
    398     return XML_CONVERT_OUTPUT_EXHAUSTED;
    399   else if (input_incomplete)
    400     return XML_CONVERT_INPUT_INCOMPLETE;
    401   else
    402     return XML_CONVERT_COMPLETED;
    403 }
    404 
    405 static enum XML_Convert_Result PTRCALL
    406 utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
    407              unsigned short **toP, const unsigned short *toLim) {
    408   enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
    409   unsigned short *to = *toP;
    410   const char *from = *fromP;
    411   while (from < fromLim && to < toLim) {
    412     switch (SB_BYTE_TYPE(enc, from)) {
    413     case BT_LEAD2:
    414       if (fromLim - from < 2) {
    415         res = XML_CONVERT_INPUT_INCOMPLETE;
    416         goto after;
    417       }
    418       *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
    419       from += 2;
    420       break;
    421     case BT_LEAD3:
    422       if (fromLim - from < 3) {
    423         res = XML_CONVERT_INPUT_INCOMPLETE;
    424         goto after;
    425       }
    426       *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6)
    427                                | (from[2] & 0x3f));
    428       from += 3;
    429       break;
    430     case BT_LEAD4: {
    431       unsigned long n;
    432       if (toLim - to < 2) {
    433         res = XML_CONVERT_OUTPUT_EXHAUSTED;
    434         goto after;
    435       }
    436       if (fromLim - from < 4) {
    437         res = XML_CONVERT_INPUT_INCOMPLETE;
    438         goto after;
    439       }
    440       n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
    441           | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
    442       n -= 0x10000;
    443       to[0] = (unsigned short)((n >> 10) | 0xD800);
    444       to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
    445       to += 2;
    446       from += 4;
    447     } break;
    448     default:
    449       *to++ = *from++;
    450       break;
    451     }
    452   }
    453   if (from < fromLim)
    454     res = XML_CONVERT_OUTPUT_EXHAUSTED;
    455 after:
    456   *fromP = from;
    457   *toP = to;
    458   return res;
    459 }
    460 
    461 #ifdef XML_NS
    462 static const struct normal_encoding utf8_encoding_ns
    463     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
    464        {
    465 #  include "asciitab.h"
    466 #  include "utf8tab.h"
    467        },
    468        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
    469 #endif
    470 
    471 static const struct normal_encoding utf8_encoding
    472     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
    473        {
    474 #define BT_COLON BT_NMSTRT
    475 #include "asciitab.h"
    476 #undef BT_COLON
    477 #include "utf8tab.h"
    478        },
    479        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
    480 
    481 #ifdef XML_NS
    482 
    483 static const struct normal_encoding internal_utf8_encoding_ns
    484     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
    485        {
    486 #  include "iasciitab.h"
    487 #  include "utf8tab.h"
    488        },
    489        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
    490 
    491 #endif
    492 
    493 static const struct normal_encoding internal_utf8_encoding
    494     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
    495        {
    496 #define BT_COLON BT_NMSTRT
    497 #include "iasciitab.h"
    498 #undef BT_COLON
    499 #include "utf8tab.h"
    500        },
    501        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
    502 
    503 static enum XML_Convert_Result PTRCALL
    504 latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
    505               char **toP, const char *toLim) {
    506   UNUSED_P(enc);
    507   for (;;) {
    508     unsigned char c;
    509     if (*fromP == fromLim)
    510       return XML_CONVERT_COMPLETED;
    511     c = (unsigned char)**fromP;
    512     if (c & 0x80) {
    513       if (toLim - *toP < 2)
    514         return XML_CONVERT_OUTPUT_EXHAUSTED;
    515       *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
    516       *(*toP)++ = (char)((c & 0x3f) | 0x80);
    517       (*fromP)++;
    518     } else {
    519       if (*toP == toLim)
    520         return XML_CONVERT_OUTPUT_EXHAUSTED;
    521       *(*toP)++ = *(*fromP)++;
    522     }
    523   }
    524 }
    525 
    526 static enum XML_Convert_Result PTRCALL
    527 latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
    528                unsigned short **toP, const unsigned short *toLim) {
    529   UNUSED_P(enc);
    530   while (*fromP < fromLim && *toP < toLim)
    531     *(*toP)++ = (unsigned char)*(*fromP)++;
    532 
    533   if ((*toP == toLim) && (*fromP < fromLim))
    534     return XML_CONVERT_OUTPUT_EXHAUSTED;
    535   else
    536     return XML_CONVERT_COMPLETED;
    537 }
    538 
    539 #ifdef XML_NS
    540 
    541 static const struct normal_encoding latin1_encoding_ns
    542     = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
    543        {
    544 #  include "asciitab.h"
    545 #  include "latin1tab.h"
    546        },
    547        STANDARD_VTABLE(sb_) NULL_VTABLE};
    548 
    549 #endif
    550 
    551 static const struct normal_encoding latin1_encoding
    552     = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
    553        {
    554 #define BT_COLON BT_NMSTRT
    555 #include "asciitab.h"
    556 #undef BT_COLON
    557 #include "latin1tab.h"
    558        },
    559        STANDARD_VTABLE(sb_) NULL_VTABLE};
    560 
    561 static enum XML_Convert_Result PTRCALL
    562 ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
    563              char **toP, const char *toLim) {
    564   UNUSED_P(enc);
    565   while (*fromP < fromLim && *toP < toLim)
    566     *(*toP)++ = *(*fromP)++;
    567 
    568   if ((*toP == toLim) && (*fromP < fromLim))
    569     return XML_CONVERT_OUTPUT_EXHAUSTED;
    570   else
    571     return XML_CONVERT_COMPLETED;
    572 }
    573 
    574 #ifdef XML_NS
    575 
    576 static const struct normal_encoding ascii_encoding_ns
    577     = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
    578        {
    579 #  include "asciitab.h"
    580            /* BT_NONXML == 0 */
    581        },
    582        STANDARD_VTABLE(sb_) NULL_VTABLE};
    583 
    584 #endif
    585 
    586 static const struct normal_encoding ascii_encoding
    587     = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
    588        {
    589 #define BT_COLON BT_NMSTRT
    590 #include "asciitab.h"
    591 #undef BT_COLON
    592            /* BT_NONXML == 0 */
    593        },
    594        STANDARD_VTABLE(sb_) NULL_VTABLE};
    595 
    596 static int PTRFASTCALL
    597 unicode_byte_type(char hi, char lo) {
    598   switch ((unsigned char)hi) {
    599   /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */
    600   case 0xD8:
    601   case 0xD9:
    602   case 0xDA:
    603   case 0xDB:
    604     return BT_LEAD4;
    605   /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */
    606   case 0xDC:
    607   case 0xDD:
    608   case 0xDE:
    609   case 0xDF:
    610     return BT_TRAIL;
    611   case 0xFF:
    612     switch ((unsigned char)lo) {
    613     case 0xFF: /* noncharacter-FFFF */
    614     case 0xFE: /* noncharacter-FFFE */
    615       return BT_NONXML;
    616     }
    617     break;
    618   }
    619   return BT_NONASCII;
    620 }
    621 
    622 #define DEFINE_UTF16_TO_UTF8(E)                                                \
    623   static enum XML_Convert_Result PTRCALL E##toUtf8(                            \
    624       const ENCODING *enc, const char **fromP, const char *fromLim,            \
    625       char **toP, const char *toLim) {                                         \
    626     const char *from = *fromP;                                                 \
    627     UNUSED_P(enc);                                                             \
    628     fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */      \
    629     for (; from < fromLim; from += 2) {                                        \
    630       int plane;                                                               \
    631       unsigned char lo2;                                                       \
    632       unsigned char lo = GET_LO(from);                                         \
    633       unsigned char hi = GET_HI(from);                                         \
    634       switch (hi) {                                                            \
    635       case 0:                                                                  \
    636         if (lo < 0x80) {                                                       \
    637           if (*toP == toLim) {                                                 \
    638             *fromP = from;                                                     \
    639             return XML_CONVERT_OUTPUT_EXHAUSTED;                               \
    640           }                                                                    \
    641           *(*toP)++ = lo;                                                      \
    642           break;                                                               \
    643         }                                                                      \
    644         /* fall through */                                                     \
    645       case 0x1:                                                                \
    646       case 0x2:                                                                \
    647       case 0x3:                                                                \
    648       case 0x4:                                                                \
    649       case 0x5:                                                                \
    650       case 0x6:                                                                \
    651       case 0x7:                                                                \
    652         if (toLim - *toP < 2) {                                                \
    653           *fromP = from;                                                       \
    654           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
    655         }                                                                      \
    656         *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2);                      \
    657         *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \
    658         break;                                                                 \
    659       default:                                                                 \
    660         if (toLim - *toP < 3) {                                                \
    661           *fromP = from;                                                       \
    662           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
    663         }                                                                      \
    664         /* 16 bits divided 4, 6, 6 amongst 3 bytes */                          \
    665         *(*toP)++ = ((hi >> 4) | UTF8_cval3);                                  \
    666         *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80);                    \
    667         *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \
    668         break;                                                                 \
    669       case 0xD8:                                                               \
    670       case 0xD9:                                                               \
    671       case 0xDA:                                                               \
    672       case 0xDB:                                                               \
    673         if (toLim - *toP < 4) {                                                \
    674           *fromP = from;                                                       \
    675           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
    676         }                                                                      \
    677         if (fromLim - from < 4) {                                              \
    678           *fromP = from;                                                       \
    679           return XML_CONVERT_INPUT_INCOMPLETE;                                 \
    680         }                                                                      \
    681         plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1;                   \
    682         *(*toP)++ = (char)((plane >> 2) | UTF8_cval4);                         \
    683         *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80);         \
    684         from += 2;                                                             \
    685         lo2 = GET_LO(from);                                                    \
    686         *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2)           \
    687                      | (lo2 >> 6) | 0x80);                                     \
    688         *(*toP)++ = ((lo2 & 0x3f) | 0x80);                                     \
    689         break;                                                                 \
    690       }                                                                        \
    691     }                                                                          \
    692     *fromP = from;                                                             \
    693     if (from < fromLim)                                                        \
    694       return XML_CONVERT_INPUT_INCOMPLETE;                                     \
    695     else                                                                       \
    696       return XML_CONVERT_COMPLETED;                                            \
    697   }
    698 
    699 #define DEFINE_UTF16_TO_UTF16(E)                                               \
    700   static enum XML_Convert_Result PTRCALL E##toUtf16(                           \
    701       const ENCODING *enc, const char **fromP, const char *fromLim,            \
    702       unsigned short **toP, const unsigned short *toLim) {                     \
    703     enum XML_Convert_Result res = XML_CONVERT_COMPLETED;                       \
    704     UNUSED_P(enc);                                                             \
    705     fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */  \
    706     /* Avoid copying first half only of surrogate */                           \
    707     if (fromLim - *fromP > ((toLim - *toP) << 1)                               \
    708         && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) {                             \
    709       fromLim -= 2;                                                            \
    710       res = XML_CONVERT_INPUT_INCOMPLETE;                                      \
    711     }                                                                          \
    712     for (; *fromP < fromLim && *toP < toLim; *fromP += 2)                      \
    713       *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP);                      \
    714     if ((*toP == toLim) && (*fromP < fromLim))                                 \
    715       return XML_CONVERT_OUTPUT_EXHAUSTED;                                     \
    716     else                                                                       \
    717       return res;                                                              \
    718   }
    719 
    720 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
    721 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
    722 
    723 DEFINE_UTF16_TO_UTF8(little2_)
    724 DEFINE_UTF16_TO_UTF16(little2_)
    725 
    726 #undef GET_LO
    727 #undef GET_HI
    728 
    729 #define GET_LO(ptr) ((unsigned char)(ptr)[1])
    730 #define GET_HI(ptr) ((unsigned char)(ptr)[0])
    731 
    732 DEFINE_UTF16_TO_UTF8(big2_)
    733 DEFINE_UTF16_TO_UTF16(big2_)
    734 
    735 #undef GET_LO
    736 #undef GET_HI
    737 
    738 #define LITTLE2_BYTE_TYPE(enc, p)                                              \
    739   ((p)[1] == 0 ? SB_BYTE_TYPE(enc, p) : unicode_byte_type((p)[1], (p)[0]))
    740 #define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
    741 #define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c))
    742 #define LITTLE2_IS_NAME_CHAR_MINBPC(p)                                         \
    743   UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
    744 #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)                                       \
    745   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
    746 
    747 #ifdef XML_MIN_SIZE
    748 
    749 static int PTRFASTCALL
    750 little2_byteType(const ENCODING *enc, const char *p) {
    751   return LITTLE2_BYTE_TYPE(enc, p);
    752 }
    753 
    754 static int PTRFASTCALL
    755 little2_byteToAscii(const ENCODING *enc, const char *p) {
    756   UNUSED_P(enc);
    757   return LITTLE2_BYTE_TO_ASCII(p);
    758 }
    759 
    760 static int PTRCALL
    761 little2_charMatches(const ENCODING *enc, const char *p, int c) {
    762   UNUSED_P(enc);
    763   return LITTLE2_CHAR_MATCHES(p, c);
    764 }
    765 
    766 static int PTRFASTCALL
    767 little2_isNameMin(const ENCODING *enc, const char *p) {
    768   UNUSED_P(enc);
    769   return LITTLE2_IS_NAME_CHAR_MINBPC(p);
    770 }
    771 
    772 static int PTRFASTCALL
    773 little2_isNmstrtMin(const ENCODING *enc, const char *p) {
    774   UNUSED_P(enc);
    775   return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p);
    776 }
    777 
    778 #  undef VTABLE
    779 #  define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
    780 
    781 #else /* not XML_MIN_SIZE */
    782 
    783 #  undef PREFIX
    784 #  define PREFIX(ident) little2_##ident
    785 #  define MINBPC(enc) 2
    786 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
    787 #  define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
    788 #  define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p)
    789 #  define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c)
    790 #  define IS_NAME_CHAR(enc, p, n) 0
    791 #  define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p)
    792 #  define IS_NMSTRT_CHAR(enc, p, n) (0)
    793 #  define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)
    794 
    795 #  define XML_TOK_IMPL_C
    796 #  include "xmltok_impl.c"
    797 #  undef XML_TOK_IMPL_C
    798 
    799 #  undef MINBPC
    800 #  undef BYTE_TYPE
    801 #  undef BYTE_TO_ASCII
    802 #  undef CHAR_MATCHES
    803 #  undef IS_NAME_CHAR
    804 #  undef IS_NAME_CHAR_MINBPC
    805 #  undef IS_NMSTRT_CHAR
    806 #  undef IS_NMSTRT_CHAR_MINBPC
    807 #  undef IS_INVALID_CHAR
    808 
    809 #endif /* not XML_MIN_SIZE */
    810 
    811 #ifdef XML_NS
    812 
    813 static const struct normal_encoding little2_encoding_ns
    814     = {{VTABLE, 2, 0,
    815 #  if BYTEORDER == 1234
    816         1
    817 #  else
    818         0
    819 #  endif
    820        },
    821        {
    822 #  include "asciitab.h"
    823 #  include "latin1tab.h"
    824        },
    825        STANDARD_VTABLE(little2_) NULL_VTABLE};
    826 
    827 #endif
    828 
    829 static const struct normal_encoding little2_encoding
    830     = {{VTABLE, 2, 0,
    831 #if BYTEORDER == 1234
    832         1
    833 #else
    834         0
    835 #endif
    836        },
    837        {
    838 #define BT_COLON BT_NMSTRT
    839 #include "asciitab.h"
    840 #undef BT_COLON
    841 #include "latin1tab.h"
    842        },
    843        STANDARD_VTABLE(little2_) NULL_VTABLE};
    844 
    845 #if BYTEORDER != 4321
    846 
    847 #  ifdef XML_NS
    848 
    849 static const struct normal_encoding internal_little2_encoding_ns
    850     = {{VTABLE, 2, 0, 1},
    851        {
    852 #    include "iasciitab.h"
    853 #    include "latin1tab.h"
    854        },
    855        STANDARD_VTABLE(little2_) NULL_VTABLE};
    856 
    857 #  endif
    858 
    859 static const struct normal_encoding internal_little2_encoding
    860     = {{VTABLE, 2, 0, 1},
    861        {
    862 #  define BT_COLON BT_NMSTRT
    863 #  include "iasciitab.h"
    864 #  undef BT_COLON
    865 #  include "latin1tab.h"
    866        },
    867        STANDARD_VTABLE(little2_) NULL_VTABLE};
    868 
    869 #endif
    870 
    871 #define BIG2_BYTE_TYPE(enc, p)                                                 \
    872   ((p)[0] == 0 ? SB_BYTE_TYPE(enc, p + 1) : unicode_byte_type((p)[0], (p)[1]))
    873 #define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
    874 #define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c))
    875 #define BIG2_IS_NAME_CHAR_MINBPC(p)                                            \
    876   UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
    877 #define BIG2_IS_NMSTRT_CHAR_MINBPC(p)                                          \
    878   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
    879 
    880 #ifdef XML_MIN_SIZE
    881 
    882 static int PTRFASTCALL
    883 big2_byteType(const ENCODING *enc, const char *p) {
    884   return BIG2_BYTE_TYPE(enc, p);
    885 }
    886 
    887 static int PTRFASTCALL
    888 big2_byteToAscii(const ENCODING *enc, const char *p) {
    889   UNUSED_P(enc);
    890   return BIG2_BYTE_TO_ASCII(p);
    891 }
    892 
    893 static int PTRCALL
    894 big2_charMatches(const ENCODING *enc, const char *p, int c) {
    895   UNUSED_P(enc);
    896   return BIG2_CHAR_MATCHES(p, c);
    897 }
    898 
    899 static int PTRFASTCALL
    900 big2_isNameMin(const ENCODING *enc, const char *p) {
    901   UNUSED_P(enc);
    902   return BIG2_IS_NAME_CHAR_MINBPC(p);
    903 }
    904 
    905 static int PTRFASTCALL
    906 big2_isNmstrtMin(const ENCODING *enc, const char *p) {
    907   UNUSED_P(enc);
    908   return BIG2_IS_NMSTRT_CHAR_MINBPC(p);
    909 }
    910 
    911 #  undef VTABLE
    912 #  define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
    913 
    914 #else /* not XML_MIN_SIZE */
    915 
    916 #  undef PREFIX
    917 #  define PREFIX(ident) big2_##ident
    918 #  define MINBPC(enc) 2
    919 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
    920 #  define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
    921 #  define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p)
    922 #  define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c)
    923 #  define IS_NAME_CHAR(enc, p, n) 0
    924 #  define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p)
    925 #  define IS_NMSTRT_CHAR(enc, p, n) (0)
    926 #  define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p)
    927 
    928 #  define XML_TOK_IMPL_C
    929 #  include "xmltok_impl.c"
    930 #  undef XML_TOK_IMPL_C
    931 
    932 #  undef MINBPC
    933 #  undef BYTE_TYPE
    934 #  undef BYTE_TO_ASCII
    935 #  undef CHAR_MATCHES
    936 #  undef IS_NAME_CHAR
    937 #  undef IS_NAME_CHAR_MINBPC
    938 #  undef IS_NMSTRT_CHAR
    939 #  undef IS_NMSTRT_CHAR_MINBPC
    940 #  undef IS_INVALID_CHAR
    941 
    942 #endif /* not XML_MIN_SIZE */
    943 
    944 #ifdef XML_NS
    945 
    946 static const struct normal_encoding big2_encoding_ns
    947     = {{VTABLE, 2, 0,
    948 #  if BYTEORDER == 4321
    949         1
    950 #  else
    951         0
    952 #  endif
    953        },
    954        {
    955 #  include "asciitab.h"
    956 #  include "latin1tab.h"
    957        },
    958        STANDARD_VTABLE(big2_) NULL_VTABLE};
    959 
    960 #endif
    961 
    962 static const struct normal_encoding big2_encoding
    963     = {{VTABLE, 2, 0,
    964 #if BYTEORDER == 4321
    965         1
    966 #else
    967         0
    968 #endif
    969        },
    970        {
    971 #define BT_COLON BT_NMSTRT
    972 #include "asciitab.h"
    973 #undef BT_COLON
    974 #include "latin1tab.h"
    975        },
    976        STANDARD_VTABLE(big2_) NULL_VTABLE};
    977 
    978 #if BYTEORDER != 1234
    979 
    980 #  ifdef XML_NS
    981 
    982 static const struct normal_encoding internal_big2_encoding_ns
    983     = {{VTABLE, 2, 0, 1},
    984        {
    985 #    include "iasciitab.h"
    986 #    include "latin1tab.h"
    987        },
    988        STANDARD_VTABLE(big2_) NULL_VTABLE};
    989 
    990 #  endif
    991 
    992 static const struct normal_encoding internal_big2_encoding
    993     = {{VTABLE, 2, 0, 1},
    994        {
    995 #  define BT_COLON BT_NMSTRT
    996 #  include "iasciitab.h"
    997 #  undef BT_COLON
    998 #  include "latin1tab.h"
    999        },
   1000        STANDARD_VTABLE(big2_) NULL_VTABLE};
   1001 
   1002 #endif
   1003 
   1004 #undef PREFIX
   1005 
   1006 static int FASTCALL
   1007 streqci(const char *s1, const char *s2) {
   1008   for (;;) {
   1009     char c1 = *s1++;
   1010     char c2 = *s2++;
   1011     if (ASCII_a <= c1 && c1 <= ASCII_z)
   1012       c1 += ASCII_A - ASCII_a;
   1013     if (ASCII_a <= c2 && c2 <= ASCII_z)
   1014       /* The following line will never get executed.  streqci() is
   1015        * only called from two places, both of which guarantee to put
   1016        * upper-case strings into s2.
   1017        */
   1018       c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
   1019     if (c1 != c2)
   1020       return 0;
   1021     if (! c1)
   1022       break;
   1023   }
   1024   return 1;
   1025 }
   1026 
   1027 static void PTRCALL
   1028 initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end,
   1029                    POSITION *pos) {
   1030   UNUSED_P(enc);
   1031   normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
   1032 }
   1033 
   1034 static int
   1035 toAscii(const ENCODING *enc, const char *ptr, const char *end) {
   1036   char buf[1];
   1037   char *p = buf;
   1038   XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
   1039   if (p == buf)
   1040     return -1;
   1041   else
   1042     return buf[0];
   1043 }
   1044 
   1045 static int FASTCALL
   1046 isSpace(int c) {
   1047   switch (c) {
   1048   case 0x20:
   1049   case 0xD:
   1050   case 0xA:
   1051   case 0x9:
   1052     return 1;
   1053   }
   1054   return 0;
   1055 }
   1056 
   1057 /* Return 1 if there's just optional white space or there's an S
   1058    followed by name=val.
   1059 */
   1060 static int
   1061 parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end,
   1062                      const char **namePtr, const char **nameEndPtr,
   1063                      const char **valPtr, const char **nextTokPtr) {
   1064   int c;
   1065   char open;
   1066   if (ptr == end) {
   1067     *namePtr = NULL;
   1068     return 1;
   1069   }
   1070   if (! isSpace(toAscii(enc, ptr, end))) {
   1071     *nextTokPtr = ptr;
   1072     return 0;
   1073   }
   1074   do {
   1075     ptr += enc->minBytesPerChar;
   1076   } while (isSpace(toAscii(enc, ptr, end)));
   1077   if (ptr == end) {
   1078     *namePtr = NULL;
   1079     return 1;
   1080   }
   1081   *namePtr = ptr;
   1082   for (;;) {
   1083     c = toAscii(enc, ptr, end);
   1084     if (c == -1) {
   1085       *nextTokPtr = ptr;
   1086       return 0;
   1087     }
   1088     if (c == ASCII_EQUALS) {
   1089       *nameEndPtr = ptr;
   1090       break;
   1091     }
   1092     if (isSpace(c)) {
   1093       *nameEndPtr = ptr;
   1094       do {
   1095         ptr += enc->minBytesPerChar;
   1096       } while (isSpace(c = toAscii(enc, ptr, end)));
   1097       if (c != ASCII_EQUALS) {
   1098         *nextTokPtr = ptr;
   1099         return 0;
   1100       }
   1101       break;
   1102     }
   1103     ptr += enc->minBytesPerChar;
   1104   }
   1105   if (ptr == *namePtr) {
   1106     *nextTokPtr = ptr;
   1107     return 0;
   1108   }
   1109   ptr += enc->minBytesPerChar;
   1110   c = toAscii(enc, ptr, end);
   1111   while (isSpace(c)) {
   1112     ptr += enc->minBytesPerChar;
   1113     c = toAscii(enc, ptr, end);
   1114   }
   1115   if (c != ASCII_QUOT && c != ASCII_APOS) {
   1116     *nextTokPtr = ptr;
   1117     return 0;
   1118   }
   1119   open = (char)c;
   1120   ptr += enc->minBytesPerChar;
   1121   *valPtr = ptr;
   1122   for (;; ptr += enc->minBytesPerChar) {
   1123     c = toAscii(enc, ptr, end);
   1124     if (c == open)
   1125       break;
   1126     if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)
   1127         && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD
   1128         && c != ASCII_MINUS && c != ASCII_UNDERSCORE) {
   1129       *nextTokPtr = ptr;
   1130       return 0;
   1131     }
   1132   }
   1133   *nextTokPtr = ptr + enc->minBytesPerChar;
   1134   return 1;
   1135 }
   1136 
   1137 static const char KW_version[]
   1138     = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'};
   1139 
   1140 static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d,
   1141                                    ASCII_i, ASCII_n, ASCII_g, '\0'};
   1142 
   1143 static const char KW_standalone[]
   1144     = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a,
   1145        ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'};
   1146 
   1147 static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'};
   1148 
   1149 static const char KW_no[] = {ASCII_n, ASCII_o, '\0'};
   1150 
   1151 static int
   1152 doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *,
   1153                                                  const char *),
   1154                int isGeneralTextEntity, const ENCODING *enc, const char *ptr,
   1155                const char *end, const char **badPtr, const char **versionPtr,
   1156                const char **versionEndPtr, const char **encodingName,
   1157                const ENCODING **encoding, int *standalone) {
   1158   const char *val = NULL;
   1159   const char *name = NULL;
   1160   const char *nameEnd = NULL;
   1161   ptr += 5 * enc->minBytesPerChar;
   1162   end -= 2 * enc->minBytesPerChar;
   1163   if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
   1164       || ! name) {
   1165     *badPtr = ptr;
   1166     return 0;
   1167   }
   1168   if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
   1169     if (! isGeneralTextEntity) {
   1170       *badPtr = name;
   1171       return 0;
   1172     }
   1173   } else {
   1174     if (versionPtr)
   1175       *versionPtr = val;
   1176     if (versionEndPtr)
   1177       *versionEndPtr = ptr;
   1178     if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
   1179       *badPtr = ptr;
   1180       return 0;
   1181     }
   1182     if (! name) {
   1183       if (isGeneralTextEntity) {
   1184         /* a TextDecl must have an EncodingDecl */
   1185         *badPtr = ptr;
   1186         return 0;
   1187       }
   1188       return 1;
   1189     }
   1190   }
   1191   if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
   1192     int c = toAscii(enc, val, end);
   1193     if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) {
   1194       *badPtr = val;
   1195       return 0;
   1196     }
   1197     if (encodingName)
   1198       *encodingName = val;
   1199     if (encoding)
   1200       *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
   1201     if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
   1202       *badPtr = ptr;
   1203       return 0;
   1204     }
   1205     if (! name)
   1206       return 1;
   1207   }
   1208   if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
   1209       || isGeneralTextEntity) {
   1210     *badPtr = name;
   1211     return 0;
   1212   }
   1213   if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
   1214     if (standalone)
   1215       *standalone = 1;
   1216   } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
   1217     if (standalone)
   1218       *standalone = 0;
   1219   } else {
   1220     *badPtr = val;
   1221     return 0;
   1222   }
   1223   while (isSpace(toAscii(enc, ptr, end)))
   1224     ptr += enc->minBytesPerChar;
   1225   if (ptr != end) {
   1226     *badPtr = ptr;
   1227     return 0;
   1228   }
   1229   return 1;
   1230 }
   1231 
   1232 static int FASTCALL
   1233 checkCharRefNumber(int result) {
   1234   switch (result >> 8) {
   1235   case 0xD8:
   1236   case 0xD9:
   1237   case 0xDA:
   1238   case 0xDB:
   1239   case 0xDC:
   1240   case 0xDD:
   1241   case 0xDE:
   1242   case 0xDF:
   1243     return -1;
   1244   case 0:
   1245     if (latin1_encoding.type[result] == BT_NONXML)
   1246       return -1;
   1247     break;
   1248   case 0xFF:
   1249     if (result == 0xFFFE || result == 0xFFFF)
   1250       return -1;
   1251     break;
   1252   }
   1253   return result;
   1254 }
   1255 
   1256 int FASTCALL
   1257 XmlUtf8Encode(int c, char *buf) {
   1258   enum {
   1259     /* minN is minimum legal resulting value for N byte sequence */
   1260     min2 = 0x80,
   1261     min3 = 0x800,
   1262     min4 = 0x10000
   1263   };
   1264 
   1265   if (c < 0)
   1266     return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
   1267   if (c < min2) {
   1268     buf[0] = (char)(c | UTF8_cval1);
   1269     return 1;
   1270   }
   1271   if (c < min3) {
   1272     buf[0] = (char)((c >> 6) | UTF8_cval2);
   1273     buf[1] = (char)((c & 0x3f) | 0x80);
   1274     return 2;
   1275   }
   1276   if (c < min4) {
   1277     buf[0] = (char)((c >> 12) | UTF8_cval3);
   1278     buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
   1279     buf[2] = (char)((c & 0x3f) | 0x80);
   1280     return 3;
   1281   }
   1282   if (c < 0x110000) {
   1283     buf[0] = (char)((c >> 18) | UTF8_cval4);
   1284     buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
   1285     buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
   1286     buf[3] = (char)((c & 0x3f) | 0x80);
   1287     return 4;
   1288   }
   1289   return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
   1290 }
   1291 
   1292 int FASTCALL
   1293 XmlUtf16Encode(int charNum, unsigned short *buf) {
   1294   if (charNum < 0)
   1295     return 0;
   1296   if (charNum < 0x10000) {
   1297     buf[0] = (unsigned short)charNum;
   1298     return 1;
   1299   }
   1300   if (charNum < 0x110000) {
   1301     charNum -= 0x10000;
   1302     buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
   1303     buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
   1304     return 2;
   1305   }
   1306   return 0;
   1307 }
   1308 
   1309 struct unknown_encoding {
   1310   struct normal_encoding normal;
   1311   CONVERTER convert;
   1312   void *userData;
   1313   unsigned short utf16[256];
   1314   char utf8[256][4];
   1315 };
   1316 
   1317 #define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc))
   1318 
   1319 int
   1320 XmlSizeOfUnknownEncoding(void) {
   1321   return sizeof(struct unknown_encoding);
   1322 }
   1323 
   1324 static int PTRFASTCALL
   1325 unknown_isName(const ENCODING *enc, const char *p) {
   1326   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
   1327   int c = uenc->convert(uenc->userData, p);
   1328   if (c & ~0xFFFF)
   1329     return 0;
   1330   return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
   1331 }
   1332 
   1333 static int PTRFASTCALL
   1334 unknown_isNmstrt(const ENCODING *enc, const char *p) {
   1335   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
   1336   int c = uenc->convert(uenc->userData, p);
   1337   if (c & ~0xFFFF)
   1338     return 0;
   1339   return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
   1340 }
   1341 
   1342 static int PTRFASTCALL
   1343 unknown_isInvalid(const ENCODING *enc, const char *p) {
   1344   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
   1345   int c = uenc->convert(uenc->userData, p);
   1346   return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
   1347 }
   1348 
   1349 static enum XML_Convert_Result PTRCALL
   1350 unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
   1351                char **toP, const char *toLim) {
   1352   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
   1353   char buf[XML_UTF8_ENCODE_MAX];
   1354   for (;;) {
   1355     const char *utf8;
   1356     int n;
   1357     if (*fromP == fromLim)
   1358       return XML_CONVERT_COMPLETED;
   1359     utf8 = uenc->utf8[(unsigned char)**fromP];
   1360     n = *utf8++;
   1361     if (n == 0) {
   1362       int c = uenc->convert(uenc->userData, *fromP);
   1363       n = XmlUtf8Encode(c, buf);
   1364       if (n > toLim - *toP)
   1365         return XML_CONVERT_OUTPUT_EXHAUSTED;
   1366       utf8 = buf;
   1367       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
   1368                  - (BT_LEAD2 - 2));
   1369     } else {
   1370       if (n > toLim - *toP)
   1371         return XML_CONVERT_OUTPUT_EXHAUSTED;
   1372       (*fromP)++;
   1373     }
   1374     memcpy(*toP, utf8, n);
   1375     *toP += n;
   1376   }
   1377 }
   1378 
   1379 static enum XML_Convert_Result PTRCALL
   1380 unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
   1381                 unsigned short **toP, const unsigned short *toLim) {
   1382   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
   1383   while (*fromP < fromLim && *toP < toLim) {
   1384     unsigned short c = uenc->utf16[(unsigned char)**fromP];
   1385     if (c == 0) {
   1386       c = (unsigned short)uenc->convert(uenc->userData, *fromP);
   1387       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
   1388                  - (BT_LEAD2 - 2));
   1389     } else
   1390       (*fromP)++;
   1391     *(*toP)++ = c;
   1392   }
   1393 
   1394   if ((*toP == toLim) && (*fromP < fromLim))
   1395     return XML_CONVERT_OUTPUT_EXHAUSTED;
   1396   else
   1397     return XML_CONVERT_COMPLETED;
   1398 }
   1399 
   1400 ENCODING *
   1401 XmlInitUnknownEncoding(void *mem, const int *table, CONVERTER convert,
   1402                        void *userData) {
   1403   int i;
   1404   struct unknown_encoding *e = (struct unknown_encoding *)mem;
   1405   memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding));
   1406   for (i = 0; i < 128; i++)
   1407     if (latin1_encoding.type[i] != BT_OTHER
   1408         && latin1_encoding.type[i] != BT_NONXML && table[i] != i)
   1409       return 0;
   1410   for (i = 0; i < 256; i++) {
   1411     int c = table[i];
   1412     if (c == -1) {
   1413       e->normal.type[i] = BT_MALFORM;
   1414       /* This shouldn't really get used. */
   1415       e->utf16[i] = 0xFFFF;
   1416       e->utf8[i][0] = 1;
   1417       e->utf8[i][1] = 0;
   1418     } else if (c < 0) {
   1419       if (c < -4)
   1420         return 0;
   1421       /* Multi-byte sequences need a converter function */
   1422       if (! convert)
   1423         return 0;
   1424       e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
   1425       e->utf8[i][0] = 0;
   1426       e->utf16[i] = 0;
   1427     } else if (c < 0x80) {
   1428       if (latin1_encoding.type[c] != BT_OTHER
   1429           && latin1_encoding.type[c] != BT_NONXML && c != i)
   1430         return 0;
   1431       e->normal.type[i] = latin1_encoding.type[c];
   1432       e->utf8[i][0] = 1;
   1433       e->utf8[i][1] = (char)c;
   1434       e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
   1435     } else if (checkCharRefNumber(c) < 0) {
   1436       e->normal.type[i] = BT_NONXML;
   1437       /* This shouldn't really get used. */
   1438       e->utf16[i] = 0xFFFF;
   1439       e->utf8[i][0] = 1;
   1440       e->utf8[i][1] = 0;
   1441     } else {
   1442       if (c > 0xFFFF)
   1443         return 0;
   1444       if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
   1445         e->normal.type[i] = BT_NMSTRT;
   1446       else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
   1447         e->normal.type[i] = BT_NAME;
   1448       else
   1449         e->normal.type[i] = BT_OTHER;
   1450       e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
   1451       e->utf16[i] = (unsigned short)c;
   1452     }
   1453   }
   1454   e->userData = userData;
   1455   e->convert = convert;
   1456   if (convert) {
   1457     e->normal.isName2 = unknown_isName;
   1458     e->normal.isName3 = unknown_isName;
   1459     e->normal.isName4 = unknown_isName;
   1460     e->normal.isNmstrt2 = unknown_isNmstrt;
   1461     e->normal.isNmstrt3 = unknown_isNmstrt;
   1462     e->normal.isNmstrt4 = unknown_isNmstrt;
   1463     e->normal.isInvalid2 = unknown_isInvalid;
   1464     e->normal.isInvalid3 = unknown_isInvalid;
   1465     e->normal.isInvalid4 = unknown_isInvalid;
   1466   }
   1467   e->normal.enc.utf8Convert = unknown_toUtf8;
   1468   e->normal.enc.utf16Convert = unknown_toUtf16;
   1469   return &(e->normal.enc);
   1470 }
   1471 
   1472 /* If this enumeration is changed, getEncodingIndex and encodings
   1473 must also be changed. */
   1474 enum {
   1475   UNKNOWN_ENC = -1,
   1476   ISO_8859_1_ENC = 0,
   1477   US_ASCII_ENC,
   1478   UTF_8_ENC,
   1479   UTF_16_ENC,
   1480   UTF_16BE_ENC,
   1481   UTF_16LE_ENC,
   1482   /* must match encodingNames up to here */
   1483   NO_ENC
   1484 };
   1485 
   1486 static const char KW_ISO_8859_1[]
   1487     = {ASCII_I, ASCII_S, ASCII_O,     ASCII_MINUS, ASCII_8, ASCII_8,
   1488        ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1,     '\0'};
   1489 static const char KW_US_ASCII[]
   1490     = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S,
   1491        ASCII_C, ASCII_I, ASCII_I,     '\0'};
   1492 static const char KW_UTF_8[]
   1493     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'};
   1494 static const char KW_UTF_16[]
   1495     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'};
   1496 static const char KW_UTF_16BE[]
   1497     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
   1498        ASCII_6, ASCII_B, ASCII_E, '\0'};
   1499 static const char KW_UTF_16LE[]
   1500     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
   1501        ASCII_6, ASCII_L, ASCII_E, '\0'};
   1502 
   1503 static int FASTCALL
   1504 getEncodingIndex(const char *name) {
   1505   static const char *const encodingNames[] = {
   1506       KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE,
   1507   };
   1508   int i;
   1509   if (name == NULL)
   1510     return NO_ENC;
   1511   for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++)
   1512     if (streqci(name, encodingNames[i]))
   1513       return i;
   1514   return UNKNOWN_ENC;
   1515 }
   1516 
   1517 /* For binary compatibility, we store the index of the encoding
   1518    specified at initialization in the isUtf16 member.
   1519 */
   1520 
   1521 #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
   1522 #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
   1523 
   1524 /* This is what detects the encoding.  encodingTable maps from
   1525    encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
   1526    the external (protocol) specified encoding; state is
   1527    XML_CONTENT_STATE if we're parsing an external text entity, and
   1528    XML_PROLOG_STATE otherwise.
   1529 */
   1530 
   1531 static int
   1532 initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc,
   1533          int state, const char *ptr, const char *end, const char **nextTokPtr) {
   1534   const ENCODING **encPtr;
   1535 
   1536   if (ptr >= end)
   1537     return XML_TOK_NONE;
   1538   encPtr = enc->encPtr;
   1539   if (ptr + 1 == end) {
   1540     /* only a single byte available for auto-detection */
   1541 #ifndef XML_DTD /* FIXME */
   1542     /* a well-formed document entity must have more than one byte */
   1543     if (state != XML_CONTENT_STATE)
   1544       return XML_TOK_PARTIAL;
   1545 #endif
   1546     /* so we're parsing an external text entity... */
   1547     /* if UTF-16 was externally specified, then we need at least 2 bytes */
   1548     switch (INIT_ENC_INDEX(enc)) {
   1549     case UTF_16_ENC:
   1550     case UTF_16LE_ENC:
   1551     case UTF_16BE_ENC:
   1552       return XML_TOK_PARTIAL;
   1553     }
   1554     switch ((unsigned char)*ptr) {
   1555     case 0xFE:
   1556     case 0xFF:
   1557     case 0xEF: /* possibly first byte of UTF-8 BOM */
   1558       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
   1559         break;
   1560       /* fall through */
   1561     case 0x00:
   1562     case 0x3C:
   1563       return XML_TOK_PARTIAL;
   1564     }
   1565   } else {
   1566     switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
   1567     case 0xFEFF:
   1568       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
   1569         break;
   1570       *nextTokPtr = ptr + 2;
   1571       *encPtr = encodingTable[UTF_16BE_ENC];
   1572       return XML_TOK_BOM;
   1573     /* 00 3C is handled in the default case */
   1574     case 0x3C00:
   1575       if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
   1576            || INIT_ENC_INDEX(enc) == UTF_16_ENC)
   1577           && state == XML_CONTENT_STATE)
   1578         break;
   1579       *encPtr = encodingTable[UTF_16LE_ENC];
   1580       return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
   1581     case 0xFFFE:
   1582       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
   1583         break;
   1584       *nextTokPtr = ptr + 2;
   1585       *encPtr = encodingTable[UTF_16LE_ENC];
   1586       return XML_TOK_BOM;
   1587     case 0xEFBB:
   1588       /* Maybe a UTF-8 BOM (EF BB BF) */
   1589       /* If there's an explicitly specified (external) encoding
   1590          of ISO-8859-1 or some flavour of UTF-16
   1591          and this is an external text entity,
   1592          don't look for the BOM,
   1593          because it might be a legal data.
   1594       */
   1595       if (state == XML_CONTENT_STATE) {
   1596         int e = INIT_ENC_INDEX(enc);
   1597         if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC
   1598             || e == UTF_16_ENC)
   1599           break;
   1600       }
   1601       if (ptr + 2 == end)
   1602         return XML_TOK_PARTIAL;
   1603       if ((unsigned char)ptr[2] == 0xBF) {
   1604         *nextTokPtr = ptr + 3;
   1605         *encPtr = encodingTable[UTF_8_ENC];
   1606         return XML_TOK_BOM;
   1607       }
   1608       break;
   1609     default:
   1610       if (ptr[0] == '\0') {
   1611         /* 0 isn't a legal data character. Furthermore a document
   1612            entity can only start with ASCII characters.  So the only
   1613            way this can fail to be big-endian UTF-16 if it it's an
   1614            external parsed general entity that's labelled as
   1615            UTF-16LE.
   1616         */
   1617         if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
   1618           break;
   1619         *encPtr = encodingTable[UTF_16BE_ENC];
   1620         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
   1621       } else if (ptr[1] == '\0') {
   1622         /* We could recover here in the case:
   1623             - parsing an external entity
   1624             - second byte is 0
   1625             - no externally specified encoding
   1626             - no encoding declaration
   1627            by assuming UTF-16LE.  But we don't, because this would mean when
   1628            presented just with a single byte, we couldn't reliably determine
   1629            whether we needed further bytes.
   1630         */
   1631         if (state == XML_CONTENT_STATE)
   1632           break;
   1633         *encPtr = encodingTable[UTF_16LE_ENC];
   1634         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
   1635       }
   1636       break;
   1637     }
   1638   }
   1639   *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
   1640   return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
   1641 }
   1642 
   1643 #define NS(x) x
   1644 #define ns(x) x
   1645 #define XML_TOK_NS_C
   1646 #include "xmltok_ns.c"
   1647 #undef XML_TOK_NS_C
   1648 #undef NS
   1649 #undef ns
   1650 
   1651 #ifdef XML_NS
   1652 
   1653 #  define NS(x) x##NS
   1654 #  define ns(x) x##_ns
   1655 
   1656 #  define XML_TOK_NS_C
   1657 #  include "xmltok_ns.c"
   1658 #  undef XML_TOK_NS_C
   1659 
   1660 #  undef NS
   1661 #  undef ns
   1662 
   1663 ENCODING *
   1664 XmlInitUnknownEncodingNS(void *mem, const int *table, CONVERTER convert,
   1665                          void *userData) {
   1666   ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
   1667   if (enc)
   1668     ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
   1669   return enc;
   1670 }
   1671 
   1672 #endif /* XML_NS */
   1673