1 1.3 maya /* 2 1.3 maya __ __ _ 3 1.3 maya ___\ \/ /_ __ __ _| |_ 4 1.3 maya / _ \\ /| '_ \ / _` | __| 5 1.3 maya | __// \| |_) | (_| | |_ 6 1.3 maya \___/_/\_\ .__/ \__,_|\__| 7 1.3 maya |_| XML parser 8 1.3 maya 9 1.3 maya Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 1.4 christos Copyright (c) 2000 Clark Cooper <coopercc (at) users.sourceforge.net> 11 1.4 christos Copyright (c) 2002 Fred L. Drake, Jr. <fdrake (at) users.sourceforge.net> 12 1.4 christos Copyright (c) 2002-2005 Karl Waclawek <karl (at) waclawek.net> 13 1.5 wiz Copyright (c) 2016-2024 Sebastian Pipping <sebastian (at) pipping.org> 14 1.4 christos Copyright (c) 2017 Rhodri James <rhodri (at) wildebeest.org.uk> 15 1.3 maya Licensed under the MIT license: 16 1.3 maya 17 1.3 maya Permission is hereby granted, free of charge, to any person obtaining 18 1.3 maya a copy of this software and associated documentation files (the 19 1.3 maya "Software"), to deal in the Software without restriction, including 20 1.3 maya without limitation the rights to use, copy, modify, merge, publish, 21 1.3 maya distribute, sublicense, and/or sell copies of the Software, and to permit 22 1.3 maya persons to whom the Software is furnished to do so, subject to the 23 1.3 maya following conditions: 24 1.3 maya 25 1.3 maya The above copyright notice and this permission notice shall be included 26 1.3 maya in all copies or substantial portions of the Software. 27 1.3 maya 28 1.3 maya THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 1.3 maya EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 1.3 maya MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 31 1.3 maya NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 32 1.3 maya DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 33 1.3 maya OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 34 1.3 maya USE OR OTHER DEALINGS IN THE SOFTWARE. 35 1.1 tron */ 36 1.1 tron 37 1.1 tron #ifndef XmlTok_INCLUDED 38 1.1 tron #define XmlTok_INCLUDED 1 39 1.1 tron 40 1.1 tron #ifdef __cplusplus 41 1.1 tron extern "C" { 42 1.1 tron #endif 43 1.1 tron 44 1.1 tron /* The following token may be returned by XmlContentTok */ 45 1.3 maya #define XML_TOK_TRAILING_RSQB \ 46 1.3 maya -5 /* ] or ]] at the end of the scan; might be \ 47 1.3 maya start of illegal ]]> sequence */ 48 1.1 tron /* The following tokens may be returned by both XmlPrologTok and 49 1.1 tron XmlContentTok. 50 1.1 tron */ 51 1.3 maya #define XML_TOK_NONE -4 /* The string to be scanned is empty */ 52 1.3 maya #define XML_TOK_TRAILING_CR \ 53 1.3 maya -3 /* A CR at the end of the scan; \ 54 1.3 maya might be part of CRLF sequence */ 55 1.3 maya #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ 56 1.3 maya #define XML_TOK_PARTIAL -1 /* only part of a token */ 57 1.1 tron #define XML_TOK_INVALID 0 58 1.1 tron 59 1.1 tron /* The following tokens are returned by XmlContentTok; some are also 60 1.1 tron returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok. 61 1.1 tron */ 62 1.1 tron #define XML_TOK_START_TAG_WITH_ATTS 1 63 1.1 tron #define XML_TOK_START_TAG_NO_ATTS 2 64 1.1 tron #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ 65 1.1 tron #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 66 1.1 tron #define XML_TOK_END_TAG 5 67 1.1 tron #define XML_TOK_DATA_CHARS 6 68 1.1 tron #define XML_TOK_DATA_NEWLINE 7 69 1.1 tron #define XML_TOK_CDATA_SECT_OPEN 8 70 1.1 tron #define XML_TOK_ENTITY_REF 9 71 1.3 maya #define XML_TOK_CHAR_REF 10 /* numeric character reference */ 72 1.1 tron 73 1.1 tron /* The following tokens may be returned by both XmlPrologTok and 74 1.1 tron XmlContentTok. 75 1.1 tron */ 76 1.3 maya #define XML_TOK_PI 11 /* processing instruction */ 77 1.3 maya #define XML_TOK_XML_DECL 12 /* XML decl or text decl */ 78 1.1 tron #define XML_TOK_COMMENT 13 79 1.3 maya #define XML_TOK_BOM 14 /* Byte order mark */ 80 1.1 tron 81 1.1 tron /* The following tokens are returned only by XmlPrologTok */ 82 1.1 tron #define XML_TOK_PROLOG_S 15 83 1.3 maya #define XML_TOK_DECL_OPEN 16 /* <!foo */ 84 1.3 maya #define XML_TOK_DECL_CLOSE 17 /* > */ 85 1.1 tron #define XML_TOK_NAME 18 86 1.1 tron #define XML_TOK_NMTOKEN 19 87 1.3 maya #define XML_TOK_POUND_NAME 20 /* #name */ 88 1.3 maya #define XML_TOK_OR 21 /* | */ 89 1.1 tron #define XML_TOK_PERCENT 22 90 1.1 tron #define XML_TOK_OPEN_PAREN 23 91 1.1 tron #define XML_TOK_CLOSE_PAREN 24 92 1.1 tron #define XML_TOK_OPEN_BRACKET 25 93 1.1 tron #define XML_TOK_CLOSE_BRACKET 26 94 1.1 tron #define XML_TOK_LITERAL 27 95 1.1 tron #define XML_TOK_PARAM_ENTITY_REF 28 96 1.1 tron #define XML_TOK_INSTANCE_START 29 97 1.1 tron 98 1.1 tron /* The following occur only in element type declarations */ 99 1.3 maya #define XML_TOK_NAME_QUESTION 30 /* name? */ 100 1.3 maya #define XML_TOK_NAME_ASTERISK 31 /* name* */ 101 1.3 maya #define XML_TOK_NAME_PLUS 32 /* name+ */ 102 1.3 maya #define XML_TOK_COND_SECT_OPEN 33 /* <![ */ 103 1.3 maya #define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */ 104 1.3 maya #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ 105 1.3 maya #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ 106 1.3 maya #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ 107 1.1 tron #define XML_TOK_COMMA 38 108 1.1 tron 109 1.1 tron /* The following token is returned only by XmlAttributeValueTok */ 110 1.1 tron #define XML_TOK_ATTRIBUTE_VALUE_S 39 111 1.1 tron 112 1.1 tron /* The following token is returned only by XmlCdataSectionTok */ 113 1.1 tron #define XML_TOK_CDATA_SECT_CLOSE 40 114 1.1 tron 115 1.1 tron /* With namespace processing this is returned by XmlPrologTok for a 116 1.1 tron name with a colon. 117 1.1 tron */ 118 1.1 tron #define XML_TOK_PREFIXED_NAME 41 119 1.1 tron 120 1.1 tron #ifdef XML_DTD 121 1.3 maya # define XML_TOK_IGNORE_SECT 42 122 1.1 tron #endif /* XML_DTD */ 123 1.1 tron 124 1.1 tron #ifdef XML_DTD 125 1.3 maya # define XML_N_STATES 4 126 1.1 tron #else /* not XML_DTD */ 127 1.3 maya # define XML_N_STATES 3 128 1.1 tron #endif /* not XML_DTD */ 129 1.1 tron 130 1.1 tron #define XML_PROLOG_STATE 0 131 1.1 tron #define XML_CONTENT_STATE 1 132 1.1 tron #define XML_CDATA_SECTION_STATE 2 133 1.1 tron #ifdef XML_DTD 134 1.3 maya # define XML_IGNORE_SECTION_STATE 3 135 1.1 tron #endif /* XML_DTD */ 136 1.1 tron 137 1.1 tron #define XML_N_LITERAL_TYPES 2 138 1.1 tron #define XML_ATTRIBUTE_VALUE_LITERAL 0 139 1.1 tron #define XML_ENTITY_VALUE_LITERAL 1 140 1.1 tron 141 1.1 tron /* The size of the buffer passed to XmlUtf8Encode must be at least this. */ 142 1.1 tron #define XML_UTF8_ENCODE_MAX 4 143 1.1 tron /* The size of the buffer passed to XmlUtf16Encode must be at least this. */ 144 1.1 tron #define XML_UTF16_ENCODE_MAX 2 145 1.1 tron 146 1.1 tron typedef struct position { 147 1.1 tron /* first line and first column are 0 not 1 */ 148 1.1 tron XML_Size lineNumber; 149 1.1 tron XML_Size columnNumber; 150 1.1 tron } POSITION; 151 1.1 tron 152 1.1 tron typedef struct { 153 1.1 tron const char *name; 154 1.1 tron const char *valuePtr; 155 1.1 tron const char *valueEnd; 156 1.1 tron char normalized; 157 1.1 tron } ATTRIBUTE; 158 1.1 tron 159 1.1 tron struct encoding; 160 1.1 tron typedef struct encoding ENCODING; 161 1.1 tron 162 1.3 maya typedef int(PTRCALL *SCANNER)(const ENCODING *, const char *, const char *, 163 1.3 maya const char **); 164 1.1 tron 165 1.2 spz enum XML_Convert_Result { 166 1.2 spz XML_CONVERT_COMPLETED = 0, 167 1.2 spz XML_CONVERT_INPUT_INCOMPLETE = 1, 168 1.3 maya XML_CONVERT_OUTPUT_EXHAUSTED 169 1.3 maya = 2 /* and therefore potentially input remaining as well */ 170 1.2 spz }; 171 1.2 spz 172 1.1 tron struct encoding { 173 1.1 tron SCANNER scanners[XML_N_STATES]; 174 1.1 tron SCANNER literalScanners[XML_N_LITERAL_TYPES]; 175 1.3 maya int(PTRCALL *nameMatchesAscii)(const ENCODING *, const char *, const char *, 176 1.3 maya const char *); 177 1.3 maya int(PTRFASTCALL *nameLength)(const ENCODING *, const char *); 178 1.1 tron const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *); 179 1.3 maya int(PTRCALL *getAtts)(const ENCODING *enc, const char *ptr, int attsMax, 180 1.3 maya ATTRIBUTE *atts); 181 1.3 maya int(PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr); 182 1.3 maya int(PTRCALL *predefinedEntityName)(const ENCODING *, const char *, 183 1.3 maya const char *); 184 1.3 maya void(PTRCALL *updatePosition)(const ENCODING *, const char *ptr, 185 1.3 maya const char *end, POSITION *); 186 1.3 maya int(PTRCALL *isPublicId)(const ENCODING *enc, const char *ptr, 187 1.3 maya const char *end, const char **badPtr); 188 1.3 maya enum XML_Convert_Result(PTRCALL *utf8Convert)(const ENCODING *enc, 189 1.3 maya const char **fromP, 190 1.3 maya const char *fromLim, char **toP, 191 1.3 maya const char *toLim); 192 1.3 maya enum XML_Convert_Result(PTRCALL *utf16Convert)(const ENCODING *enc, 193 1.3 maya const char **fromP, 194 1.3 maya const char *fromLim, 195 1.3 maya unsigned short **toP, 196 1.3 maya const unsigned short *toLim); 197 1.1 tron int minBytesPerChar; 198 1.1 tron char isUtf8; 199 1.1 tron char isUtf16; 200 1.1 tron }; 201 1.1 tron 202 1.1 tron /* Scan the string starting at ptr until the end of the next complete 203 1.1 tron token, but do not scan past eptr. Return an integer giving the 204 1.1 tron type of token. 205 1.1 tron 206 1.1 tron Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. 207 1.1 tron 208 1.1 tron Return XML_TOK_PARTIAL when the string does not contain a complete 209 1.1 tron token; nextTokPtr will not be set. 210 1.1 tron 211 1.1 tron Return XML_TOK_INVALID when the string does not start a valid 212 1.1 tron token; nextTokPtr will be set to point to the character which made 213 1.1 tron the token invalid. 214 1.1 tron 215 1.1 tron Otherwise the string starts with a valid token; nextTokPtr will be 216 1.1 tron set to point to the character following the end of that token. 217 1.1 tron 218 1.1 tron Each data character counts as a single token, but adjacent data 219 1.1 tron characters may be returned together. Similarly for characters in 220 1.1 tron the prolog outside literals, comments and processing instructions. 221 1.1 tron */ 222 1.1 tron 223 1.3 maya #define XmlTok(enc, state, ptr, end, nextTokPtr) \ 224 1.1 tron (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) 225 1.1 tron 226 1.3 maya #define XmlPrologTok(enc, ptr, end, nextTokPtr) \ 227 1.3 maya XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) 228 1.1 tron 229 1.3 maya #define XmlContentTok(enc, ptr, end, nextTokPtr) \ 230 1.3 maya XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) 231 1.1 tron 232 1.3 maya #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ 233 1.3 maya XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) 234 1.1 tron 235 1.1 tron #ifdef XML_DTD 236 1.1 tron 237 1.3 maya # define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ 238 1.3 maya XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) 239 1.1 tron 240 1.1 tron #endif /* XML_DTD */ 241 1.1 tron 242 1.1 tron /* This is used for performing a 2nd-level tokenization on the content 243 1.1 tron of a literal that has already been returned by XmlTok. 244 1.1 tron */ 245 1.3 maya #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ 246 1.1 tron (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) 247 1.1 tron 248 1.3 maya #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ 249 1.3 maya XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) 250 1.1 tron 251 1.3 maya #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ 252 1.3 maya XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) 253 1.1 tron 254 1.3 maya #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ 255 1.1 tron (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) 256 1.1 tron 257 1.3 maya #define XmlNameLength(enc, ptr) (((enc)->nameLength)(enc, ptr)) 258 1.1 tron 259 1.3 maya #define XmlSkipS(enc, ptr) (((enc)->skipS)(enc, ptr)) 260 1.1 tron 261 1.3 maya #define XmlGetAttributes(enc, ptr, attsMax, atts) \ 262 1.1 tron (((enc)->getAtts)(enc, ptr, attsMax, atts)) 263 1.1 tron 264 1.3 maya #define XmlCharRefNumber(enc, ptr) (((enc)->charRefNumber)(enc, ptr)) 265 1.1 tron 266 1.3 maya #define XmlPredefinedEntityName(enc, ptr, end) \ 267 1.1 tron (((enc)->predefinedEntityName)(enc, ptr, end)) 268 1.1 tron 269 1.3 maya #define XmlUpdatePosition(enc, ptr, end, pos) \ 270 1.1 tron (((enc)->updatePosition)(enc, ptr, end, pos)) 271 1.1 tron 272 1.3 maya #define XmlIsPublicId(enc, ptr, end, badPtr) \ 273 1.1 tron (((enc)->isPublicId)(enc, ptr, end, badPtr)) 274 1.1 tron 275 1.3 maya #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ 276 1.1 tron (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) 277 1.1 tron 278 1.3 maya #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ 279 1.1 tron (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) 280 1.1 tron 281 1.1 tron typedef struct { 282 1.1 tron ENCODING initEnc; 283 1.1 tron const ENCODING **encPtr; 284 1.1 tron } INIT_ENCODING; 285 1.1 tron 286 1.3 maya int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc, 287 1.3 maya const char *ptr, const char *end, const char **badPtr, 288 1.3 maya const char **versionPtr, const char **versionEndPtr, 289 1.1 tron const char **encodingNamePtr, 290 1.3 maya const ENCODING **namedEncodingPtr, int *standalonePtr); 291 1.1 tron 292 1.5 wiz int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr, 293 1.5 wiz const char *name); 294 1.1 tron const ENCODING *XmlGetUtf8InternalEncoding(void); 295 1.1 tron const ENCODING *XmlGetUtf16InternalEncoding(void); 296 1.1 tron int FASTCALL XmlUtf8Encode(int charNumber, char *buf); 297 1.1 tron int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf); 298 1.1 tron int XmlSizeOfUnknownEncoding(void); 299 1.1 tron 300 1.3 maya typedef int(XMLCALL *CONVERTER)(void *userData, const char *p); 301 1.1 tron 302 1.3 maya ENCODING *XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, 303 1.3 maya void *userData); 304 1.1 tron 305 1.3 maya int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc, 306 1.3 maya const char *ptr, const char *end, const char **badPtr, 307 1.3 maya const char **versionPtr, const char **versionEndPtr, 308 1.1 tron const char **encodingNamePtr, 309 1.3 maya const ENCODING **namedEncodingPtr, int *standalonePtr); 310 1.1 tron 311 1.5 wiz int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr, 312 1.5 wiz const char *name); 313 1.1 tron const ENCODING *XmlGetUtf8InternalEncodingNS(void); 314 1.1 tron const ENCODING *XmlGetUtf16InternalEncodingNS(void); 315 1.3 maya ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, 316 1.3 maya void *userData); 317 1.1 tron #ifdef __cplusplus 318 1.1 tron } 319 1.1 tron #endif 320 1.1 tron 321 1.1 tron #endif /* not XmlTok_INCLUDED */ 322