Home | History | Annotate | Line # | Download | only in lib
      1  1.3      maya /*
      2  1.3      maya                             __  __            _
      3  1.3      maya                          ___\ \/ /_ __   __ _| |_
      4  1.3      maya                         / _ \\  /| '_ \ / _` | __|
      5  1.3      maya                        |  __//  \| |_) | (_| | |_
      6  1.3      maya                         \___/_/\_\ .__/ \__,_|\__|
      7  1.3      maya                                  |_| XML parser
      8  1.3      maya 
      9  1.3      maya    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
     10  1.4  christos    Copyright (c) 2000      Clark Cooper <coopercc (at) users.sourceforge.net>
     11  1.4  christos    Copyright (c) 2002      Fred L. Drake, Jr. <fdrake (at) users.sourceforge.net>
     12  1.4  christos    Copyright (c) 2002-2005 Karl Waclawek <karl (at) waclawek.net>
     13  1.5       wiz    Copyright (c) 2016-2024 Sebastian Pipping <sebastian (at) pipping.org>
     14  1.4  christos    Copyright (c) 2017      Rhodri James <rhodri (at) wildebeest.org.uk>
     15  1.3      maya    Licensed under the MIT license:
     16  1.3      maya 
     17  1.3      maya    Permission is  hereby granted,  free of charge,  to any  person obtaining
     18  1.3      maya    a  copy  of  this  software   and  associated  documentation  files  (the
     19  1.3      maya    "Software"),  to  deal in  the  Software  without restriction,  including
     20  1.3      maya    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
     21  1.3      maya    distribute, sublicense, and/or sell copies of the Software, and to permit
     22  1.3      maya    persons  to whom  the Software  is  furnished to  do so,  subject to  the
     23  1.3      maya    following conditions:
     24  1.3      maya 
     25  1.3      maya    The above copyright  notice and this permission notice  shall be included
     26  1.3      maya    in all copies or substantial portions of the Software.
     27  1.3      maya 
     28  1.3      maya    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
     29  1.3      maya    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
     30  1.3      maya    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
     31  1.3      maya    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
     32  1.3      maya    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
     33  1.3      maya    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     34  1.3      maya    USE OR OTHER DEALINGS IN THE SOFTWARE.
     35  1.1      tron */
     36  1.1      tron 
     37  1.1      tron #ifndef XmlTok_INCLUDED
     38  1.1      tron #define XmlTok_INCLUDED 1
     39  1.1      tron 
     40  1.1      tron #ifdef __cplusplus
     41  1.1      tron extern "C" {
     42  1.1      tron #endif
     43  1.1      tron 
     44  1.1      tron /* The following token may be returned by XmlContentTok */
     45  1.3      maya #define XML_TOK_TRAILING_RSQB                                                  \
     46  1.3      maya   -5 /* ] or ]] at the end of the scan; might be                               \
     47  1.3      maya         start of illegal ]]> sequence */
     48  1.1      tron /* The following tokens may be returned by both XmlPrologTok and
     49  1.1      tron    XmlContentTok.
     50  1.1      tron */
     51  1.3      maya #define XML_TOK_NONE -4 /* The string to be scanned is empty */
     52  1.3      maya #define XML_TOK_TRAILING_CR                                                    \
     53  1.3      maya   -3                            /* A CR at the end of the scan;                \
     54  1.3      maya                                    might be part of CRLF sequence */
     55  1.3      maya #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
     56  1.3      maya #define XML_TOK_PARTIAL -1      /* only part of a token */
     57  1.1      tron #define XML_TOK_INVALID 0
     58  1.1      tron 
     59  1.1      tron /* The following tokens are returned by XmlContentTok; some are also
     60  1.1      tron    returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok.
     61  1.1      tron */
     62  1.1      tron #define XML_TOK_START_TAG_WITH_ATTS 1
     63  1.1      tron #define XML_TOK_START_TAG_NO_ATTS 2
     64  1.1      tron #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
     65  1.1      tron #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4
     66  1.1      tron #define XML_TOK_END_TAG 5
     67  1.1      tron #define XML_TOK_DATA_CHARS 6
     68  1.1      tron #define XML_TOK_DATA_NEWLINE 7
     69  1.1      tron #define XML_TOK_CDATA_SECT_OPEN 8
     70  1.1      tron #define XML_TOK_ENTITY_REF 9
     71  1.3      maya #define XML_TOK_CHAR_REF 10 /* numeric character reference */
     72  1.1      tron 
     73  1.1      tron /* The following tokens may be returned by both XmlPrologTok and
     74  1.1      tron    XmlContentTok.
     75  1.1      tron */
     76  1.3      maya #define XML_TOK_PI 11       /* processing instruction */
     77  1.3      maya #define XML_TOK_XML_DECL 12 /* XML decl or text decl */
     78  1.1      tron #define XML_TOK_COMMENT 13
     79  1.3      maya #define XML_TOK_BOM 14 /* Byte order mark */
     80  1.1      tron 
     81  1.1      tron /* The following tokens are returned only by XmlPrologTok */
     82  1.1      tron #define XML_TOK_PROLOG_S 15
     83  1.3      maya #define XML_TOK_DECL_OPEN 16  /* <!foo */
     84  1.3      maya #define XML_TOK_DECL_CLOSE 17 /* > */
     85  1.1      tron #define XML_TOK_NAME 18
     86  1.1      tron #define XML_TOK_NMTOKEN 19
     87  1.3      maya #define XML_TOK_POUND_NAME 20 /* #name */
     88  1.3      maya #define XML_TOK_OR 21         /* | */
     89  1.1      tron #define XML_TOK_PERCENT 22
     90  1.1      tron #define XML_TOK_OPEN_PAREN 23
     91  1.1      tron #define XML_TOK_CLOSE_PAREN 24
     92  1.1      tron #define XML_TOK_OPEN_BRACKET 25
     93  1.1      tron #define XML_TOK_CLOSE_BRACKET 26
     94  1.1      tron #define XML_TOK_LITERAL 27
     95  1.1      tron #define XML_TOK_PARAM_ENTITY_REF 28
     96  1.1      tron #define XML_TOK_INSTANCE_START 29
     97  1.1      tron 
     98  1.1      tron /* The following occur only in element type declarations */
     99  1.3      maya #define XML_TOK_NAME_QUESTION 30        /* name? */
    100  1.3      maya #define XML_TOK_NAME_ASTERISK 31        /* name* */
    101  1.3      maya #define XML_TOK_NAME_PLUS 32            /* name+ */
    102  1.3      maya #define XML_TOK_COND_SECT_OPEN 33       /* <![ */
    103  1.3      maya #define XML_TOK_COND_SECT_CLOSE 34      /* ]]> */
    104  1.3      maya #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
    105  1.3      maya #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
    106  1.3      maya #define XML_TOK_CLOSE_PAREN_PLUS 37     /* )+ */
    107  1.1      tron #define XML_TOK_COMMA 38
    108  1.1      tron 
    109  1.1      tron /* The following token is returned only by XmlAttributeValueTok */
    110  1.1      tron #define XML_TOK_ATTRIBUTE_VALUE_S 39
    111  1.1      tron 
    112  1.1      tron /* The following token is returned only by XmlCdataSectionTok */
    113  1.1      tron #define XML_TOK_CDATA_SECT_CLOSE 40
    114  1.1      tron 
    115  1.1      tron /* With namespace processing this is returned by XmlPrologTok for a
    116  1.1      tron    name with a colon.
    117  1.1      tron */
    118  1.1      tron #define XML_TOK_PREFIXED_NAME 41
    119  1.1      tron 
    120  1.1      tron #ifdef XML_DTD
    121  1.3      maya #  define XML_TOK_IGNORE_SECT 42
    122  1.1      tron #endif /* XML_DTD */
    123  1.1      tron 
    124  1.1      tron #ifdef XML_DTD
    125  1.3      maya #  define XML_N_STATES 4
    126  1.1      tron #else /* not XML_DTD */
    127  1.3      maya #  define XML_N_STATES 3
    128  1.1      tron #endif /* not XML_DTD */
    129  1.1      tron 
    130  1.1      tron #define XML_PROLOG_STATE 0
    131  1.1      tron #define XML_CONTENT_STATE 1
    132  1.1      tron #define XML_CDATA_SECTION_STATE 2
    133  1.1      tron #ifdef XML_DTD
    134  1.3      maya #  define XML_IGNORE_SECTION_STATE 3
    135  1.1      tron #endif /* XML_DTD */
    136  1.1      tron 
    137  1.1      tron #define XML_N_LITERAL_TYPES 2
    138  1.1      tron #define XML_ATTRIBUTE_VALUE_LITERAL 0
    139  1.1      tron #define XML_ENTITY_VALUE_LITERAL 1
    140  1.1      tron 
    141  1.1      tron /* The size of the buffer passed to XmlUtf8Encode must be at least this. */
    142  1.1      tron #define XML_UTF8_ENCODE_MAX 4
    143  1.1      tron /* The size of the buffer passed to XmlUtf16Encode must be at least this. */
    144  1.1      tron #define XML_UTF16_ENCODE_MAX 2
    145  1.1      tron 
    146  1.1      tron typedef struct position {
    147  1.1      tron   /* first line and first column are 0 not 1 */
    148  1.1      tron   XML_Size lineNumber;
    149  1.1      tron   XML_Size columnNumber;
    150  1.1      tron } POSITION;
    151  1.1      tron 
    152  1.1      tron typedef struct {
    153  1.1      tron   const char *name;
    154  1.1      tron   const char *valuePtr;
    155  1.1      tron   const char *valueEnd;
    156  1.1      tron   char normalized;
    157  1.1      tron } ATTRIBUTE;
    158  1.1      tron 
    159  1.1      tron struct encoding;
    160  1.1      tron typedef struct encoding ENCODING;
    161  1.1      tron 
    162  1.3      maya typedef int(PTRCALL *SCANNER)(const ENCODING *, const char *, const char *,
    163  1.3      maya                               const char **);
    164  1.1      tron 
    165  1.2       spz enum XML_Convert_Result {
    166  1.2       spz   XML_CONVERT_COMPLETED = 0,
    167  1.2       spz   XML_CONVERT_INPUT_INCOMPLETE = 1,
    168  1.3      maya   XML_CONVERT_OUTPUT_EXHAUSTED
    169  1.3      maya   = 2 /* and therefore potentially input remaining as well */
    170  1.2       spz };
    171  1.2       spz 
    172  1.1      tron struct encoding {
    173  1.1      tron   SCANNER scanners[XML_N_STATES];
    174  1.1      tron   SCANNER literalScanners[XML_N_LITERAL_TYPES];
    175  1.3      maya   int(PTRCALL *nameMatchesAscii)(const ENCODING *, const char *, const char *,
    176  1.3      maya                                  const char *);
    177  1.3      maya   int(PTRFASTCALL *nameLength)(const ENCODING *, const char *);
    178  1.1      tron   const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *);
    179  1.3      maya   int(PTRCALL *getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
    180  1.3      maya                         ATTRIBUTE *atts);
    181  1.3      maya   int(PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr);
    182  1.3      maya   int(PTRCALL *predefinedEntityName)(const ENCODING *, const char *,
    183  1.3      maya                                      const char *);
    184  1.3      maya   void(PTRCALL *updatePosition)(const ENCODING *, const char *ptr,
    185  1.3      maya                                 const char *end, POSITION *);
    186  1.3      maya   int(PTRCALL *isPublicId)(const ENCODING *enc, const char *ptr,
    187  1.3      maya                            const char *end, const char **badPtr);
    188  1.3      maya   enum XML_Convert_Result(PTRCALL *utf8Convert)(const ENCODING *enc,
    189  1.3      maya                                                 const char **fromP,
    190  1.3      maya                                                 const char *fromLim, char **toP,
    191  1.3      maya                                                 const char *toLim);
    192  1.3      maya   enum XML_Convert_Result(PTRCALL *utf16Convert)(const ENCODING *enc,
    193  1.3      maya                                                  const char **fromP,
    194  1.3      maya                                                  const char *fromLim,
    195  1.3      maya                                                  unsigned short **toP,
    196  1.3      maya                                                  const unsigned short *toLim);
    197  1.1      tron   int minBytesPerChar;
    198  1.1      tron   char isUtf8;
    199  1.1      tron   char isUtf16;
    200  1.1      tron };
    201  1.1      tron 
    202  1.1      tron /* Scan the string starting at ptr until the end of the next complete
    203  1.1      tron    token, but do not scan past eptr.  Return an integer giving the
    204  1.1      tron    type of token.
    205  1.1      tron 
    206  1.1      tron    Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
    207  1.1      tron 
    208  1.1      tron    Return XML_TOK_PARTIAL when the string does not contain a complete
    209  1.1      tron    token; nextTokPtr will not be set.
    210  1.1      tron 
    211  1.1      tron    Return XML_TOK_INVALID when the string does not start a valid
    212  1.1      tron    token; nextTokPtr will be set to point to the character which made
    213  1.1      tron    the token invalid.
    214  1.1      tron 
    215  1.1      tron    Otherwise the string starts with a valid token; nextTokPtr will be
    216  1.1      tron    set to point to the character following the end of that token.
    217  1.1      tron 
    218  1.1      tron    Each data character counts as a single token, but adjacent data
    219  1.1      tron    characters may be returned together.  Similarly for characters in
    220  1.1      tron    the prolog outside literals, comments and processing instructions.
    221  1.1      tron */
    222  1.1      tron 
    223  1.3      maya #define XmlTok(enc, state, ptr, end, nextTokPtr)                               \
    224  1.1      tron   (((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
    225  1.1      tron 
    226  1.3      maya #define XmlPrologTok(enc, ptr, end, nextTokPtr)                                \
    227  1.3      maya   XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
    228  1.1      tron 
    229  1.3      maya #define XmlContentTok(enc, ptr, end, nextTokPtr)                               \
    230  1.3      maya   XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
    231  1.1      tron 
    232  1.3      maya #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr)                          \
    233  1.3      maya   XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
    234  1.1      tron 
    235  1.1      tron #ifdef XML_DTD
    236  1.1      tron 
    237  1.3      maya #  define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr)                       \
    238  1.3      maya     XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr)
    239  1.1      tron 
    240  1.1      tron #endif /* XML_DTD */
    241  1.1      tron 
    242  1.1      tron /* This is used for performing a 2nd-level tokenization on the content
    243  1.1      tron    of a literal that has already been returned by XmlTok.
    244  1.1      tron */
    245  1.3      maya #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr)                  \
    246  1.1      tron   (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
    247  1.1      tron 
    248  1.3      maya #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr)                        \
    249  1.3      maya   XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
    250  1.1      tron 
    251  1.3      maya #define XmlEntityValueTok(enc, ptr, end, nextTokPtr)                           \
    252  1.3      maya   XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
    253  1.1      tron 
    254  1.3      maya #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2)                             \
    255  1.1      tron   (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2))
    256  1.1      tron 
    257  1.3      maya #define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr))
    258  1.1      tron 
    259  1.3      maya #define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr))
    260  1.1      tron 
    261  1.3      maya #define XmlGetAttributes(enc, ptr, attsMax, atts)                              \
    262  1.1      tron   (((enc)->getAtts)(enc, ptr, attsMax, atts))
    263  1.1      tron 
    264  1.3      maya #define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr))
    265  1.1      tron 
    266  1.3      maya #define XmlPredefinedEntityName(enc, ptr, end)                                 \
    267  1.1      tron   (((enc)->predefinedEntityName)(enc, ptr, end))
    268  1.1      tron 
    269  1.3      maya #define XmlUpdatePosition(enc, ptr, end, pos)                                  \
    270  1.1      tron   (((enc)->updatePosition)(enc, ptr, end, pos))
    271  1.1      tron 
    272  1.3      maya #define XmlIsPublicId(enc, ptr, end, badPtr)                                   \
    273  1.1      tron   (((enc)->isPublicId)(enc, ptr, end, badPtr))
    274  1.1      tron 
    275  1.3      maya #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim)                        \
    276  1.1      tron   (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
    277  1.1      tron 
    278  1.3      maya #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim)                       \
    279  1.1      tron   (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
    280  1.1      tron 
    281  1.1      tron typedef struct {
    282  1.1      tron   ENCODING initEnc;
    283  1.1      tron   const ENCODING **encPtr;
    284  1.1      tron } INIT_ENCODING;
    285  1.1      tron 
    286  1.3      maya int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc,
    287  1.3      maya                     const char *ptr, const char *end, const char **badPtr,
    288  1.3      maya                     const char **versionPtr, const char **versionEndPtr,
    289  1.1      tron                     const char **encodingNamePtr,
    290  1.3      maya                     const ENCODING **namedEncodingPtr, int *standalonePtr);
    291  1.1      tron 
    292  1.5       wiz int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr,
    293  1.5       wiz                     const char *name);
    294  1.1      tron const ENCODING *XmlGetUtf8InternalEncoding(void);
    295  1.1      tron const ENCODING *XmlGetUtf16InternalEncoding(void);
    296  1.1      tron int FASTCALL XmlUtf8Encode(int charNumber, char *buf);
    297  1.1      tron int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf);
    298  1.1      tron int XmlSizeOfUnknownEncoding(void);
    299  1.1      tron 
    300  1.3      maya typedef int(XMLCALL *CONVERTER)(void *userData, const char *p);
    301  1.1      tron 
    302  1.3      maya ENCODING *XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert,
    303  1.3      maya                                  void *userData);
    304  1.1      tron 
    305  1.3      maya int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc,
    306  1.3      maya                       const char *ptr, const char *end, const char **badPtr,
    307  1.3      maya                       const char **versionPtr, const char **versionEndPtr,
    308  1.1      tron                       const char **encodingNamePtr,
    309  1.3      maya                       const ENCODING **namedEncodingPtr, int *standalonePtr);
    310  1.1      tron 
    311  1.5       wiz int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr,
    312  1.5       wiz                       const char *name);
    313  1.1      tron const ENCODING *XmlGetUtf8InternalEncodingNS(void);
    314  1.1      tron const ENCODING *XmlGetUtf16InternalEncodingNS(void);
    315  1.3      maya ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert,
    316  1.3      maya                                    void *userData);
    317  1.1      tron #ifdef __cplusplus
    318  1.1      tron }
    319  1.1      tron #endif
    320  1.1      tron 
    321  1.1      tron #endif /* not XmlTok_INCLUDED */
    322