xmlparse.c revision 1.1.1.8 1 /* 7d6840a33c250b74adb0ba295d6ec818dccebebaffc8c3ed27d0b29c28adbeb3 (2.7.0+)
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc (at) users.sourceforge.net>
11 Copyright (c) 2000-2006 Fred L. Drake, Jr. <fdrake (at) users.sourceforge.net>
12 Copyright (c) 2001-2002 Greg Stein <gstein (at) users.sourceforge.net>
13 Copyright (c) 2002-2016 Karl Waclawek <karl (at) waclawek.net>
14 Copyright (c) 2005-2009 Steven Solie <steven (at) solie.ca>
15 Copyright (c) 2016 Eric Rahm <erahm (at) mozilla.com>
16 Copyright (c) 2016-2025 Sebastian Pipping <sebastian (at) pipping.org>
17 Copyright (c) 2016 Gaurav <g.gupta (at) samsung.com>
18 Copyright (c) 2016 Thomas Beutlich <tc (at) tbeu.de>
19 Copyright (c) 2016 Gustavo Grieco <gustavo.grieco (at) imag.fr>
20 Copyright (c) 2016 Pascal Cuoq <cuoq (at) trust-in-soft.com>
21 Copyright (c) 2016 Ed Schouten <ed (at) nuxi.nl>
22 Copyright (c) 2017-2022 Rhodri James <rhodri (at) wildebeest.org.uk>
23 Copyright (c) 2017 Vclav Slavk <vaclav (at) slavik.io>
24 Copyright (c) 2017 Viktor Szakats <commit (at) vsz.me>
25 Copyright (c) 2017 Chanho Park <chanho61.park (at) samsung.com>
26 Copyright (c) 2017 Rolf Eike Beer <eike (at) sf-mail.de>
27 Copyright (c) 2017 Hans Wennborg <hans (at) chromium.org>
28 Copyright (c) 2018 Anton Maklakov <antmak.pub (at) gmail.com>
29 Copyright (c) 2018 Benjamin Peterson <benjamin (at) python.org>
30 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu (at) poste.it>
31 Copyright (c) 2018 Mariusz Zaborski <oshogbo (at) vexillium.org>
32 Copyright (c) 2019 David Loffredo <loffredo (at) steptools.com>
33 Copyright (c) 2019-2020 Ben Wagner <bungeman (at) chromium.org>
34 Copyright (c) 2019 Vadim Zeitlin <vadim (at) zeitlins.org>
35 Copyright (c) 2021 Donghee Na <donghee.na (at) python.org>
36 Copyright (c) 2022 Samanta Navarro <ferivoz (at) riseup.net>
37 Copyright (c) 2022 Jeffrey Walton <noloader (at) gmail.com>
38 Copyright (c) 2022 Jann Horn <jannh (at) google.com>
39 Copyright (c) 2022 Sean McBride <sean (at) rogue-research.com>
40 Copyright (c) 2023 Owain Davies <owaind (at) bath.edu>
41 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild (at) sony.com>
42 Copyright (c) 2024-2025 Berkay Eren rn <berkay.ueruen (at) siemens.com>
43 Copyright (c) 2024 Hanno Bck <hanno (at) gentoo.org>
44 Licensed under the MIT license:
45
46 Permission is hereby granted, free of charge, to any person obtaining
47 a copy of this software and associated documentation files (the
48 "Software"), to deal in the Software without restriction, including
49 without limitation the rights to use, copy, modify, merge, publish,
50 distribute, sublicense, and/or sell copies of the Software, and to permit
51 persons to whom the Software is furnished to do so, subject to the
52 following conditions:
53
54 The above copyright notice and this permission notice shall be included
55 in all copies or substantial portions of the Software.
56
57 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
58 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
59 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
60 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
61 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
62 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
63 USE OR OTHER DEALINGS IN THE SOFTWARE.
64 */
65
66 #define XML_BUILDING_EXPAT 1
67
68 #include "expat_config.h"
69
70 #if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
71 # error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
72 #endif
73
74 #if defined(XML_DTD) && XML_GE == 0
75 # error Either undefine XML_DTD or define XML_GE to 1.
76 #endif
77
78 #if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
79 || (XML_CONTEXT_BYTES + 0 < 0)
80 # error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
81 #endif
82
83 #if defined(HAVE_SYSCALL_GETRANDOM)
84 # if ! defined(_GNU_SOURCE)
85 # define _GNU_SOURCE 1 /* syscall prototype */
86 # endif
87 #endif
88
89 #ifdef _WIN32
90 /* force stdlib to define rand_s() */
91 # if ! defined(_CRT_RAND_S)
92 # define _CRT_RAND_S
93 # endif
94 #endif
95
96 #include <stdbool.h>
97 #include <stddef.h>
98 #include <string.h> /* memset(), memcpy() */
99 #include <assert.h>
100 #include <limits.h> /* UINT_MAX */
101 #include <stdio.h> /* fprintf */
102 #include <stdlib.h> /* getenv, rand_s */
103 #include <stdint.h> /* uintptr_t */
104 #include <math.h> /* isnan */
105
106 #ifdef _WIN32
107 # define getpid GetCurrentProcessId
108 #else
109 # include <sys/time.h> /* gettimeofday() */
110 # include <sys/types.h> /* getpid() */
111 # include <unistd.h> /* getpid() */
112 # include <fcntl.h> /* O_RDONLY */
113 # include <errno.h>
114 #endif
115
116 #ifdef _WIN32
117 # include "winconfig.h"
118 #endif
119
120 #include "ascii.h"
121 #include "expat.h"
122 #include "siphash.h"
123
124 #if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
125 # if defined(HAVE_GETRANDOM)
126 # include <sys/random.h> /* getrandom */
127 # else
128 # include <unistd.h> /* syscall */
129 # include <sys/syscall.h> /* SYS_getrandom */
130 # endif
131 # if ! defined(GRND_NONBLOCK)
132 # define GRND_NONBLOCK 0x0001
133 # endif /* defined(GRND_NONBLOCK) */
134 #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
135
136 #if defined(HAVE_LIBBSD) \
137 && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM))
138 # include <bsd/stdlib.h>
139 #endif
140
141 #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32)
142 # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800
143 #endif
144
145 #if ! defined(HAVE_GETRANDOM) && ! defined(HAVE_SYSCALL_GETRANDOM) \
146 && ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) \
147 && ! defined(XML_DEV_URANDOM) && ! defined(_WIN32) \
148 && ! defined(XML_POOR_ENTROPY)
149 # error You do not have support for any sources of high quality entropy \
150 enabled. For end user security, that is probably not what you want. \
151 \
152 Your options include: \
153 * Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
154 * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
155 * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
156 * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
157 * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
158 * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
159 * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
160 * Windows >=Vista (rand_s): _WIN32. \
161 \
162 If insist on not using any of these, bypass this error by defining \
163 XML_POOR_ENTROPY; you have been warned. \
164 \
165 If you have reasons to patch this detection code away or need changes \
166 to the build system, please open a bug. Thank you!
167 #endif
168
169 #ifdef XML_UNICODE
170 # define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
171 # define XmlConvert XmlUtf16Convert
172 # define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
173 # define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
174 # define XmlEncode XmlUtf16Encode
175 # define MUST_CONVERT(enc, s) (! (enc)->isUtf16 || (((uintptr_t)(s)) & 1))
176 typedef unsigned short ICHAR;
177 #else
178 # define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
179 # define XmlConvert XmlUtf8Convert
180 # define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
181 # define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
182 # define XmlEncode XmlUtf8Encode
183 # define MUST_CONVERT(enc, s) (! (enc)->isUtf8)
184 typedef char ICHAR;
185 #endif
186
187 #ifndef XML_NS
188
189 # define XmlInitEncodingNS XmlInitEncoding
190 # define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
191 # undef XmlGetInternalEncodingNS
192 # define XmlGetInternalEncodingNS XmlGetInternalEncoding
193 # define XmlParseXmlDeclNS XmlParseXmlDecl
194
195 #endif
196
197 #ifdef XML_UNICODE
198
199 # ifdef XML_UNICODE_WCHAR_T
200 # define XML_T(x) (const wchar_t) x
201 # define XML_L(x) L##x
202 # else
203 # define XML_T(x) (const unsigned short)x
204 # define XML_L(x) x
205 # endif
206
207 #else
208
209 # define XML_T(x) x
210 # define XML_L(x) x
211
212 #endif
213
214 /* Round up n to be a multiple of sz, where sz is a power of 2. */
215 #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
216
217 /* Do safe (NULL-aware) pointer arithmetic */
218 #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
219
220 #define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
221
222 #include "internal.h"
223 #include "xmltok.h"
224 #include "xmlrole.h"
225
226 typedef const XML_Char *KEY;
227
228 typedef struct {
229 KEY name;
230 } NAMED;
231
232 typedef struct {
233 NAMED **v;
234 unsigned char power;
235 size_t size;
236 size_t used;
237 const XML_Memory_Handling_Suite *mem;
238 } HASH_TABLE;
239
240 static size_t keylen(KEY s);
241
242 static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key);
243
244 /* For probing (after a collision) we need a step size relative prime
245 to the hash table size, which is a power of 2. We use double-hashing,
246 since we can calculate a second hash value cheaply by taking those bits
247 of the first hash value that were discarded (masked out) when the table
248 index was calculated: index = hash & mask, where mask = table->size - 1.
249 We limit the maximum step size to table->size / 4 (mask >> 2) and make
250 it odd, since odd numbers are always relative prime to a power of 2.
251 */
252 #define SECOND_HASH(hash, mask, power) \
253 ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2))
254 #define PROBE_STEP(hash, mask, power) \
255 ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1))
256
257 typedef struct {
258 NAMED **p;
259 NAMED **end;
260 } HASH_TABLE_ITER;
261
262 #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
263 #define INIT_DATA_BUF_SIZE 1024
264 #define INIT_ATTS_SIZE 16
265 #define INIT_ATTS_VERSION 0xFFFFFFFF
266 #define INIT_BLOCK_SIZE 1024
267 #define INIT_BUFFER_SIZE 1024
268
269 #define EXPAND_SPARE 24
270
271 typedef struct binding {
272 struct prefix *prefix;
273 struct binding *nextTagBinding;
274 struct binding *prevPrefixBinding;
275 const struct attribute_id *attId;
276 XML_Char *uri;
277 int uriLen;
278 int uriAlloc;
279 } BINDING;
280
281 typedef struct prefix {
282 const XML_Char *name;
283 BINDING *binding;
284 } PREFIX;
285
286 typedef struct {
287 const XML_Char *str;
288 const XML_Char *localPart;
289 const XML_Char *prefix;
290 int strLen;
291 int uriLen;
292 int prefixLen;
293 } TAG_NAME;
294
295 /* TAG represents an open element.
296 The name of the element is stored in both the document and API
297 encodings. The memory buffer 'buf' is a separately-allocated
298 memory area which stores the name. During the XML_Parse()/
299 XML_ParseBuffer() when the element is open, the memory for the 'raw'
300 version of the name (in the document encoding) is shared with the
301 document buffer. If the element is open across calls to
302 XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
303 contain the 'raw' name as well.
304
305 A parser reuses these structures, maintaining a list of allocated
306 TAG objects in a free list.
307 */
308 typedef struct tag {
309 struct tag *parent; /* parent of this element */
310 const char *rawName; /* tagName in the original encoding */
311 int rawNameLength;
312 TAG_NAME name; /* tagName in the API encoding */
313 char *buf; /* buffer for name components */
314 char *bufEnd; /* end of the buffer */
315 BINDING *bindings;
316 } TAG;
317
318 typedef struct {
319 const XML_Char *name;
320 const XML_Char *textPtr;
321 int textLen; /* length in XML_Chars */
322 int processed; /* # of processed bytes - when suspended */
323 const XML_Char *systemId;
324 const XML_Char *base;
325 const XML_Char *publicId;
326 const XML_Char *notation;
327 XML_Bool open;
328 XML_Bool hasMore; /* true if entity has not been completely processed */
329 /* An entity can be open while being already completely processed (hasMore ==
330 XML_FALSE). The reason is the delayed closing of entities until their inner
331 entities are processed and closed */
332 XML_Bool is_param;
333 XML_Bool is_internal; /* true if declared in internal subset outside PE */
334 } ENTITY;
335
336 typedef struct {
337 enum XML_Content_Type type;
338 enum XML_Content_Quant quant;
339 const XML_Char *name;
340 int firstchild;
341 int lastchild;
342 int childcnt;
343 int nextsib;
344 } CONTENT_SCAFFOLD;
345
346 #define INIT_SCAFFOLD_ELEMENTS 32
347
348 typedef struct block {
349 struct block *next;
350 int size;
351 XML_Char s[1];
352 } BLOCK;
353
354 typedef struct {
355 BLOCK *blocks;
356 BLOCK *freeBlocks;
357 const XML_Char *end;
358 XML_Char *ptr;
359 XML_Char *start;
360 const XML_Memory_Handling_Suite *mem;
361 } STRING_POOL;
362
363 /* The XML_Char before the name is used to determine whether
364 an attribute has been specified. */
365 typedef struct attribute_id {
366 XML_Char *name;
367 PREFIX *prefix;
368 XML_Bool maybeTokenized;
369 XML_Bool xmlns;
370 } ATTRIBUTE_ID;
371
372 typedef struct {
373 const ATTRIBUTE_ID *id;
374 XML_Bool isCdata;
375 const XML_Char *value;
376 } DEFAULT_ATTRIBUTE;
377
378 typedef struct {
379 unsigned long version;
380 unsigned long hash;
381 const XML_Char *uriName;
382 } NS_ATT;
383
384 typedef struct {
385 const XML_Char *name;
386 PREFIX *prefix;
387 const ATTRIBUTE_ID *idAtt;
388 int nDefaultAtts;
389 int allocDefaultAtts;
390 DEFAULT_ATTRIBUTE *defaultAtts;
391 } ELEMENT_TYPE;
392
393 typedef struct {
394 HASH_TABLE generalEntities;
395 HASH_TABLE elementTypes;
396 HASH_TABLE attributeIds;
397 HASH_TABLE prefixes;
398 STRING_POOL pool;
399 STRING_POOL entityValuePool;
400 /* false once a parameter entity reference has been skipped */
401 XML_Bool keepProcessing;
402 /* true once an internal or external PE reference has been encountered;
403 this includes the reference to an external subset */
404 XML_Bool hasParamEntityRefs;
405 XML_Bool standalone;
406 #ifdef XML_DTD
407 /* indicates if external PE has been read */
408 XML_Bool paramEntityRead;
409 HASH_TABLE paramEntities;
410 #endif /* XML_DTD */
411 PREFIX defaultPrefix;
412 /* === scaffolding for building content model === */
413 XML_Bool in_eldecl;
414 CONTENT_SCAFFOLD *scaffold;
415 unsigned contentStringLen;
416 unsigned scaffSize;
417 unsigned scaffCount;
418 int scaffLevel;
419 int *scaffIndex;
420 } DTD;
421
422 enum EntityType {
423 ENTITY_INTERNAL,
424 ENTITY_ATTRIBUTE,
425 ENTITY_VALUE,
426 };
427
428 typedef struct open_internal_entity {
429 const char *internalEventPtr;
430 const char *internalEventEndPtr;
431 struct open_internal_entity *next;
432 ENTITY *entity;
433 int startTagLevel;
434 XML_Bool betweenDecl; /* WFC: PE Between Declarations */
435 enum EntityType type;
436 } OPEN_INTERNAL_ENTITY;
437
438 enum XML_Account {
439 XML_ACCOUNT_DIRECT, /* bytes directly passed to the Expat parser */
440 XML_ACCOUNT_ENTITY_EXPANSION, /* intermediate bytes produced during entity
441 expansion */
442 XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
443 };
444
445 #if XML_GE == 1
446 typedef unsigned long long XmlBigCount;
447 typedef struct accounting {
448 XmlBigCount countBytesDirect;
449 XmlBigCount countBytesIndirect;
450 unsigned long debugLevel;
451 float maximumAmplificationFactor; // >=1.0
452 unsigned long long activationThresholdBytes;
453 } ACCOUNTING;
454
455 typedef struct entity_stats {
456 unsigned int countEverOpened;
457 unsigned int currentDepth;
458 unsigned int maximumDepthSeen;
459 unsigned long debugLevel;
460 } ENTITY_STATS;
461 #endif /* XML_GE == 1 */
462
463 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
464 const char *end, const char **endPtr);
465
466 static Processor prologProcessor;
467 static Processor prologInitProcessor;
468 static Processor contentProcessor;
469 static Processor cdataSectionProcessor;
470 #ifdef XML_DTD
471 static Processor ignoreSectionProcessor;
472 static Processor externalParEntProcessor;
473 static Processor externalParEntInitProcessor;
474 static Processor entityValueProcessor;
475 static Processor entityValueInitProcessor;
476 #endif /* XML_DTD */
477 static Processor epilogProcessor;
478 static Processor errorProcessor;
479 static Processor externalEntityInitProcessor;
480 static Processor externalEntityInitProcessor2;
481 static Processor externalEntityInitProcessor3;
482 static Processor externalEntityContentProcessor;
483 static Processor internalEntityProcessor;
484
485 static enum XML_Error handleUnknownEncoding(XML_Parser parser,
486 const XML_Char *encodingName);
487 static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
488 const char *s, const char *next);
489 static enum XML_Error initializeEncoding(XML_Parser parser);
490 static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc,
491 const char *s, const char *end, int tok,
492 const char *next, const char **nextPtr,
493 XML_Bool haveMore, XML_Bool allowClosingDoctype,
494 enum XML_Account account);
495 static enum XML_Error processEntity(XML_Parser parser, ENTITY *entity,
496 XML_Bool betweenDecl, enum EntityType type);
497 static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
498 const ENCODING *enc, const char *start,
499 const char *end, const char **endPtr,
500 XML_Bool haveMore, enum XML_Account account);
501 static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
502 const char **startPtr, const char *end,
503 const char **nextPtr, XML_Bool haveMore,
504 enum XML_Account account);
505 #ifdef XML_DTD
506 static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
507 const char **startPtr, const char *end,
508 const char **nextPtr, XML_Bool haveMore);
509 #endif /* XML_DTD */
510
511 static void freeBindings(XML_Parser parser, BINDING *bindings);
512 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
513 const char *attStr, TAG_NAME *tagNamePtr,
514 BINDING **bindingsPtr,
515 enum XML_Account account);
516 static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
517 const ATTRIBUTE_ID *attId, const XML_Char *uri,
518 BINDING **bindingsPtr);
519 static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
520 XML_Bool isCdata, XML_Bool isId,
521 const XML_Char *value, XML_Parser parser);
522 static enum XML_Error storeAttributeValue(XML_Parser parser,
523 const ENCODING *enc, XML_Bool isCdata,
524 const char *ptr, const char *end,
525 STRING_POOL *pool,
526 enum XML_Account account);
527 static enum XML_Error
528 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
529 const char *ptr, const char *end, STRING_POOL *pool,
530 enum XML_Account account, const char **nextPtr);
531 static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
532 const char *start, const char *end);
533 static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
534 #if XML_GE == 1
535 static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
536 const char *start, const char *end,
537 enum XML_Account account,
538 const char **nextPtr);
539 static enum XML_Error callStoreEntityValue(XML_Parser parser,
540 const ENCODING *enc,
541 const char *start, const char *end,
542 enum XML_Account account);
543 #else
544 static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
545 #endif
546 static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
547 const char *start, const char *end);
548 static int reportComment(XML_Parser parser, const ENCODING *enc,
549 const char *start, const char *end);
550 static void reportDefault(XML_Parser parser, const ENCODING *enc,
551 const char *start, const char *end);
552
553 static const XML_Char *getContext(XML_Parser parser);
554 static XML_Bool setContext(XML_Parser parser, const XML_Char *context);
555
556 static void FASTCALL normalizePublicId(XML_Char *s);
557
558 static DTD *dtdCreate(const XML_Memory_Handling_Suite *ms);
559 /* do not call if m_parentParser != NULL */
560 static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms);
561 static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
562 const XML_Memory_Handling_Suite *ms);
563 static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
564 const XML_Memory_Handling_Suite *ms);
565 static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
566 STRING_POOL *newPool, const HASH_TABLE *oldTable);
567 static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
568 size_t createSize);
569 static void FASTCALL hashTableInit(HASH_TABLE *table,
570 const XML_Memory_Handling_Suite *ms);
571 static void FASTCALL hashTableClear(HASH_TABLE *table);
572 static void FASTCALL hashTableDestroy(HASH_TABLE *table);
573 static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
574 const HASH_TABLE *table);
575 static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
576
577 static void FASTCALL poolInit(STRING_POOL *pool,
578 const XML_Memory_Handling_Suite *ms);
579 static void FASTCALL poolClear(STRING_POOL *pool);
580 static void FASTCALL poolDestroy(STRING_POOL *pool);
581 static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
582 const char *ptr, const char *end);
583 static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
584 const char *ptr, const char *end);
585 static XML_Bool FASTCALL poolGrow(STRING_POOL *pool);
586 static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool,
587 const XML_Char *s);
588 static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s,
589 int n);
590 static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool,
591 const XML_Char *s);
592
593 static int FASTCALL nextScaffoldPart(XML_Parser parser);
594 static XML_Content *build_model(XML_Parser parser);
595 static ELEMENT_TYPE *getElementType(XML_Parser parser, const ENCODING *enc,
596 const char *ptr, const char *end);
597
598 static XML_Char *copyString(const XML_Char *s,
599 const XML_Memory_Handling_Suite *memsuite);
600
601 static unsigned long generate_hash_secret_salt(XML_Parser parser);
602 static XML_Bool startParsing(XML_Parser parser);
603
604 static XML_Parser parserCreate(const XML_Char *encodingName,
605 const XML_Memory_Handling_Suite *memsuite,
606 const XML_Char *nameSep, DTD *dtd);
607
608 static void parserInit(XML_Parser parser, const XML_Char *encodingName);
609
610 #if XML_GE == 1
611 static float accountingGetCurrentAmplification(XML_Parser rootParser);
612 static void accountingReportStats(XML_Parser originParser, const char *epilog);
613 static void accountingOnAbort(XML_Parser originParser);
614 static void accountingReportDiff(XML_Parser rootParser,
615 unsigned int levelsAwayFromRootParser,
616 const char *before, const char *after,
617 ptrdiff_t bytesMore, int source_line,
618 enum XML_Account account);
619 static XML_Bool accountingDiffTolerated(XML_Parser originParser, int tok,
620 const char *before, const char *after,
621 int source_line,
622 enum XML_Account account);
623
624 static void entityTrackingReportStats(XML_Parser parser, ENTITY *entity,
625 const char *action, int sourceLine);
626 static void entityTrackingOnOpen(XML_Parser parser, ENTITY *entity,
627 int sourceLine);
628 static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
629 int sourceLine);
630
631 static XML_Parser getRootParserOf(XML_Parser parser,
632 unsigned int *outLevelDiff);
633 #endif /* XML_GE == 1 */
634
635 static unsigned long getDebugLevel(const char *variableName,
636 unsigned long defaultDebugLevel);
637
638 #define poolStart(pool) ((pool)->start)
639 #define poolLength(pool) ((pool)->ptr - (pool)->start)
640 #define poolChop(pool) ((void)--(pool->ptr))
641 #define poolLastChar(pool) (((pool)->ptr)[-1])
642 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
643 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
644 #define poolAppendChar(pool, c) \
645 (((pool)->ptr == (pool)->end && ! poolGrow(pool)) \
646 ? 0 \
647 : ((*((pool)->ptr)++ = c), 1))
648
649 #if ! defined(XML_TESTING)
650 const
651 #endif
652 XML_Bool g_reparseDeferralEnabledDefault
653 = XML_TRUE; // write ONLY in runtests.c
654 #if defined(XML_TESTING)
655 unsigned int g_bytesScanned = 0; // used for testing only
656 #endif
657
658 struct XML_ParserStruct {
659 /* The first member must be m_userData so that the XML_GetUserData
660 macro works. */
661 void *m_userData;
662 void *m_handlerArg;
663
664 // How the four parse buffer pointers below relate in time and space:
665 //
666 // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
667 // | | | |
668 // <--parsed-->| | |
669 // <---parsing--->| |
670 // <--unoccupied-->|
671 // <---------total-malloced/realloced-------->|
672
673 char *m_buffer; // malloc/realloc base pointer of parse buffer
674 const XML_Memory_Handling_Suite m_mem;
675 const char *m_bufferPtr; // first character to be parsed
676 char *m_bufferEnd; // past last character to be parsed
677 const char *m_bufferLim; // allocated end of m_buffer
678
679 XML_Index m_parseEndByteIndex;
680 const char *m_parseEndPtr;
681 size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
682 XML_Bool m_reparseDeferralEnabled;
683 int m_lastBufferRequestSize;
684 XML_Char *m_dataBuf;
685 XML_Char *m_dataBufEnd;
686 XML_StartElementHandler m_startElementHandler;
687 XML_EndElementHandler m_endElementHandler;
688 XML_CharacterDataHandler m_characterDataHandler;
689 XML_ProcessingInstructionHandler m_processingInstructionHandler;
690 XML_CommentHandler m_commentHandler;
691 XML_StartCdataSectionHandler m_startCdataSectionHandler;
692 XML_EndCdataSectionHandler m_endCdataSectionHandler;
693 XML_DefaultHandler m_defaultHandler;
694 XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
695 XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
696 XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
697 XML_NotationDeclHandler m_notationDeclHandler;
698 XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
699 XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
700 XML_NotStandaloneHandler m_notStandaloneHandler;
701 XML_ExternalEntityRefHandler m_externalEntityRefHandler;
702 XML_Parser m_externalEntityRefHandlerArg;
703 XML_SkippedEntityHandler m_skippedEntityHandler;
704 XML_UnknownEncodingHandler m_unknownEncodingHandler;
705 XML_ElementDeclHandler m_elementDeclHandler;
706 XML_AttlistDeclHandler m_attlistDeclHandler;
707 XML_EntityDeclHandler m_entityDeclHandler;
708 XML_XmlDeclHandler m_xmlDeclHandler;
709 const ENCODING *m_encoding;
710 INIT_ENCODING m_initEncoding;
711 const ENCODING *m_internalEncoding;
712 const XML_Char *m_protocolEncodingName;
713 XML_Bool m_ns;
714 XML_Bool m_ns_triplets;
715 void *m_unknownEncodingMem;
716 void *m_unknownEncodingData;
717 void *m_unknownEncodingHandlerData;
718 void(XMLCALL *m_unknownEncodingRelease)(void *);
719 PROLOG_STATE m_prologState;
720 Processor *m_processor;
721 enum XML_Error m_errorCode;
722 const char *m_eventPtr;
723 const char *m_eventEndPtr;
724 const char *m_positionPtr;
725 OPEN_INTERNAL_ENTITY *m_openInternalEntities;
726 OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
727 OPEN_INTERNAL_ENTITY *m_openAttributeEntities;
728 OPEN_INTERNAL_ENTITY *m_freeAttributeEntities;
729 OPEN_INTERNAL_ENTITY *m_openValueEntities;
730 OPEN_INTERNAL_ENTITY *m_freeValueEntities;
731 XML_Bool m_defaultExpandInternalEntities;
732 int m_tagLevel;
733 ENTITY *m_declEntity;
734 const XML_Char *m_doctypeName;
735 const XML_Char *m_doctypeSysid;
736 const XML_Char *m_doctypePubid;
737 const XML_Char *m_declAttributeType;
738 const XML_Char *m_declNotationName;
739 const XML_Char *m_declNotationPublicId;
740 ELEMENT_TYPE *m_declElementType;
741 ATTRIBUTE_ID *m_declAttributeId;
742 XML_Bool m_declAttributeIsCdata;
743 XML_Bool m_declAttributeIsId;
744 DTD *m_dtd;
745 const XML_Char *m_curBase;
746 TAG *m_tagStack;
747 TAG *m_freeTagList;
748 BINDING *m_inheritedBindings;
749 BINDING *m_freeBindingList;
750 int m_attsSize;
751 int m_nSpecifiedAtts;
752 int m_idAttIndex;
753 ATTRIBUTE *m_atts;
754 NS_ATT *m_nsAtts;
755 unsigned long m_nsAttsVersion;
756 unsigned char m_nsAttsPower;
757 #ifdef XML_ATTR_INFO
758 XML_AttrInfo *m_attInfo;
759 #endif
760 POSITION m_position;
761 STRING_POOL m_tempPool;
762 STRING_POOL m_temp2Pool;
763 char *m_groupConnector;
764 unsigned int m_groupSize;
765 XML_Char m_namespaceSeparator;
766 XML_Parser m_parentParser;
767 XML_ParsingStatus m_parsingStatus;
768 #ifdef XML_DTD
769 XML_Bool m_isParamEntity;
770 XML_Bool m_useForeignDTD;
771 enum XML_ParamEntityParsing m_paramEntityParsing;
772 #endif
773 unsigned long m_hash_secret_salt;
774 #if XML_GE == 1
775 ACCOUNTING m_accounting;
776 ENTITY_STATS m_entity_stats;
777 #endif
778 XML_Bool m_reenter;
779 };
780
781 #define MALLOC(parser, s) (parser->m_mem.malloc_fcn((s)))
782 #define REALLOC(parser, p, s) (parser->m_mem.realloc_fcn((p), (s)))
783 #define FREE(parser, p) (parser->m_mem.free_fcn((p)))
784
785 XML_Parser XMLCALL
786 XML_ParserCreate(const XML_Char *encodingName) {
787 return XML_ParserCreate_MM(encodingName, NULL, NULL);
788 }
789
790 XML_Parser XMLCALL
791 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
792 XML_Char tmp[2] = {nsSep, 0};
793 return XML_ParserCreate_MM(encodingName, NULL, tmp);
794 }
795
796 // "xml=http://www.w3.org/XML/1998/namespace"
797 static const XML_Char implicitContext[]
798 = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
799 ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
800 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD,
801 ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r,
802 ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
803 ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8,
804 ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
805 ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e,
806 '\0'};
807
808 /* To avoid warnings about unused functions: */
809 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
810
811 # if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
812
813 /* Obtain entropy on Linux 3.17+ */
814 static int
815 writeRandomBytes_getrandom_nonblock(void *target, size_t count) {
816 int success = 0; /* full count bytes written? */
817 size_t bytesWrittenTotal = 0;
818 const unsigned int getrandomFlags = GRND_NONBLOCK;
819
820 do {
821 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
822 const size_t bytesToWrite = count - bytesWrittenTotal;
823
824 const int bytesWrittenMore =
825 # if defined(HAVE_GETRANDOM)
826 getrandom(currentTarget, bytesToWrite, getrandomFlags);
827 # else
828 syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags);
829 # endif
830
831 if (bytesWrittenMore > 0) {
832 bytesWrittenTotal += bytesWrittenMore;
833 if (bytesWrittenTotal >= count)
834 success = 1;
835 }
836 } while (! success && (errno == EINTR));
837
838 return success;
839 }
840
841 # endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */
842
843 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
844
845 /* Extract entropy from /dev/urandom */
846 static int
847 writeRandomBytes_dev_urandom(void *target, size_t count) {
848 int success = 0; /* full count bytes written? */
849 size_t bytesWrittenTotal = 0;
850
851 const int fd = open("/dev/urandom", O_RDONLY);
852 if (fd < 0) {
853 return 0;
854 }
855
856 do {
857 void *const currentTarget = (void *)((char *)target + bytesWrittenTotal);
858 const size_t bytesToWrite = count - bytesWrittenTotal;
859
860 const ssize_t bytesWrittenMore = read(fd, currentTarget, bytesToWrite);
861
862 if (bytesWrittenMore > 0) {
863 bytesWrittenTotal += bytesWrittenMore;
864 if (bytesWrittenTotal >= count)
865 success = 1;
866 }
867 } while (! success && (errno == EINTR));
868
869 close(fd);
870 return success;
871 }
872
873 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
874
875 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
876
877 #if defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF)
878
879 static void
880 writeRandomBytes_arc4random(void *target, size_t count) {
881 size_t bytesWrittenTotal = 0;
882
883 while (bytesWrittenTotal < count) {
884 const uint32_t random32 = arc4random();
885 size_t i = 0;
886
887 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
888 i++, bytesWrittenTotal++) {
889 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
890 ((uint8_t *)target)[bytesWrittenTotal] = random8;
891 }
892 }
893 }
894
895 #endif /* defined(HAVE_ARC4RANDOM) && ! defined(HAVE_ARC4RANDOM_BUF) */
896
897 #ifdef _WIN32
898
899 /* Provide declaration of rand_s() for MinGW-32 (not 64, which has it),
900 as it didn't declare it in its header prior to version 5.3.0 of its
901 runtime package (mingwrt, containing stdlib.h). The upstream fix
902 was introduced at https://osdn.net/projects/mingw/ticket/39658 . */
903 # if defined(__MINGW32__) && defined(__MINGW32_VERSION) \
904 && __MINGW32_VERSION < 5003000L && ! defined(__MINGW64_VERSION_MAJOR)
905 __declspec(dllimport) int rand_s(unsigned int *);
906 # endif
907
908 /* Obtain entropy on Windows using the rand_s() function which
909 * generates cryptographically secure random numbers. Internally it
910 * uses RtlGenRandom API which is present in Windows XP and later.
911 */
912 static int
913 writeRandomBytes_rand_s(void *target, size_t count) {
914 size_t bytesWrittenTotal = 0;
915
916 while (bytesWrittenTotal < count) {
917 unsigned int random32 = 0;
918 size_t i = 0;
919
920 if (rand_s(&random32))
921 return 0; /* failure */
922
923 for (; (i < sizeof(random32)) && (bytesWrittenTotal < count);
924 i++, bytesWrittenTotal++) {
925 const uint8_t random8 = (uint8_t)(random32 >> (i * 8));
926 ((uint8_t *)target)[bytesWrittenTotal] = random8;
927 }
928 }
929 return 1; /* success */
930 }
931
932 #endif /* _WIN32 */
933
934 #if ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM)
935
936 static unsigned long
937 gather_time_entropy(void) {
938 # ifdef _WIN32
939 FILETIME ft;
940 GetSystemTimeAsFileTime(&ft); /* never fails */
941 return ft.dwHighDateTime ^ ft.dwLowDateTime;
942 # else
943 struct timeval tv;
944 int gettimeofday_res;
945
946 gettimeofday_res = gettimeofday(&tv, NULL);
947
948 # if defined(NDEBUG)
949 (void)gettimeofday_res;
950 # else
951 assert(gettimeofday_res == 0);
952 # endif /* defined(NDEBUG) */
953
954 /* Microseconds time is <20 bits entropy */
955 return tv.tv_usec;
956 # endif
957 }
958
959 #endif /* ! defined(HAVE_ARC4RANDOM_BUF) && ! defined(HAVE_ARC4RANDOM) */
960
961 static unsigned long
962 ENTROPY_DEBUG(const char *label, unsigned long entropy) {
963 if (getDebugLevel("EXPAT_ENTROPY_DEBUG", 0) >= 1u) {
964 fprintf(stderr, "expat: Entropy: %s --> 0x%0*lx (%lu bytes)\n", label,
965 (int)sizeof(entropy) * 2, entropy, (unsigned long)sizeof(entropy));
966 }
967 return entropy;
968 }
969
970 static unsigned long
971 generate_hash_secret_salt(XML_Parser parser) {
972 unsigned long entropy;
973 (void)parser;
974
975 /* "Failproof" high quality providers: */
976 #if defined(HAVE_ARC4RANDOM_BUF)
977 arc4random_buf(&entropy, sizeof(entropy));
978 return ENTROPY_DEBUG("arc4random_buf", entropy);
979 #elif defined(HAVE_ARC4RANDOM)
980 writeRandomBytes_arc4random((void *)&entropy, sizeof(entropy));
981 return ENTROPY_DEBUG("arc4random", entropy);
982 #else
983 /* Try high quality providers first .. */
984 # ifdef _WIN32
985 if (writeRandomBytes_rand_s((void *)&entropy, sizeof(entropy))) {
986 return ENTROPY_DEBUG("rand_s", entropy);
987 }
988 # elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM)
989 if (writeRandomBytes_getrandom_nonblock((void *)&entropy, sizeof(entropy))) {
990 return ENTROPY_DEBUG("getrandom", entropy);
991 }
992 # endif
993 # if ! defined(_WIN32) && defined(XML_DEV_URANDOM)
994 if (writeRandomBytes_dev_urandom((void *)&entropy, sizeof(entropy))) {
995 return ENTROPY_DEBUG("/dev/urandom", entropy);
996 }
997 # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */
998 /* .. and self-made low quality for backup: */
999
1000 /* Process ID is 0 bits entropy if attacker has local access */
1001 entropy = gather_time_entropy() ^ getpid();
1002
1003 /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */
1004 if (sizeof(unsigned long) == 4) {
1005 return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647);
1006 } else {
1007 return ENTROPY_DEBUG("fallback(8)",
1008 entropy * (unsigned long)2305843009213693951ULL);
1009 }
1010 #endif
1011 }
1012
1013 static unsigned long
1014 get_hash_secret_salt(XML_Parser parser) {
1015 if (parser->m_parentParser != NULL)
1016 return get_hash_secret_salt(parser->m_parentParser);
1017 return parser->m_hash_secret_salt;
1018 }
1019
1020 static enum XML_Error
1021 callProcessor(XML_Parser parser, const char *start, const char *end,
1022 const char **endPtr) {
1023 const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
1024
1025 if (parser->m_reparseDeferralEnabled
1026 && ! parser->m_parsingStatus.finalBuffer) {
1027 // Heuristic: don't try to parse a partial token again until the amount of
1028 // available data has increased significantly.
1029 const size_t had_before = parser->m_partialTokenBytesBefore;
1030 // ...but *do* try anyway if we're close to causing a reallocation.
1031 size_t available_buffer
1032 = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
1033 #if XML_CONTEXT_BYTES > 0
1034 available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
1035 #endif
1036 available_buffer
1037 += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
1038 // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
1039 const bool enough
1040 = (have_now >= 2 * had_before)
1041 || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
1042
1043 if (! enough) {
1044 *endPtr = start; // callers may expect this to be set
1045 return XML_ERROR_NONE;
1046 }
1047 }
1048 #if defined(XML_TESTING)
1049 g_bytesScanned += (unsigned)have_now;
1050 #endif
1051 // Run in a loop to eliminate dangerous recursion depths
1052 enum XML_Error ret;
1053 *endPtr = start;
1054 while (1) {
1055 // Use endPtr as the new start in each iteration, since it will
1056 // be set to the next start point by m_processor.
1057 ret = parser->m_processor(parser, *endPtr, end, endPtr);
1058
1059 // Make parsing status (and in particular XML_SUSPENDED) take
1060 // precedence over re-enter flag when they disagree
1061 if (parser->m_parsingStatus.parsing != XML_PARSING) {
1062 parser->m_reenter = XML_FALSE;
1063 }
1064
1065 if (! parser->m_reenter) {
1066 break;
1067 }
1068
1069 parser->m_reenter = XML_FALSE;
1070 if (ret != XML_ERROR_NONE)
1071 return ret;
1072 }
1073
1074 if (ret == XML_ERROR_NONE) {
1075 // if we consumed nothing, remember what we had on this parse attempt.
1076 if (*endPtr == start) {
1077 parser->m_partialTokenBytesBefore = have_now;
1078 } else {
1079 parser->m_partialTokenBytesBefore = 0;
1080 }
1081 }
1082 return ret;
1083 }
1084
1085 static XML_Bool /* only valid for root parser */
1086 startParsing(XML_Parser parser) {
1087 /* hash functions must be initialized before setContext() is called */
1088 if (parser->m_hash_secret_salt == 0)
1089 parser->m_hash_secret_salt = generate_hash_secret_salt(parser);
1090 if (parser->m_ns) {
1091 /* implicit context only set for root parser, since child
1092 parsers (i.e. external entity parsers) will inherit it
1093 */
1094 return setContext(parser, implicitContext);
1095 }
1096 return XML_TRUE;
1097 }
1098
1099 XML_Parser XMLCALL
1100 XML_ParserCreate_MM(const XML_Char *encodingName,
1101 const XML_Memory_Handling_Suite *memsuite,
1102 const XML_Char *nameSep) {
1103 return parserCreate(encodingName, memsuite, nameSep, NULL);
1104 }
1105
1106 static XML_Parser
1107 parserCreate(const XML_Char *encodingName,
1108 const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep,
1109 DTD *dtd) {
1110 XML_Parser parser;
1111
1112 if (memsuite) {
1113 XML_Memory_Handling_Suite *mtemp;
1114 parser = memsuite->malloc_fcn(sizeof(struct XML_ParserStruct));
1115 if (parser != NULL) {
1116 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1117 mtemp->malloc_fcn = memsuite->malloc_fcn;
1118 mtemp->realloc_fcn = memsuite->realloc_fcn;
1119 mtemp->free_fcn = memsuite->free_fcn;
1120 }
1121 } else {
1122 XML_Memory_Handling_Suite *mtemp;
1123 parser = (XML_Parser)malloc(sizeof(struct XML_ParserStruct));
1124 if (parser != NULL) {
1125 mtemp = (XML_Memory_Handling_Suite *)&(parser->m_mem);
1126 mtemp->malloc_fcn = malloc;
1127 mtemp->realloc_fcn = realloc;
1128 mtemp->free_fcn = free;
1129 }
1130 }
1131
1132 if (! parser)
1133 return parser;
1134
1135 parser->m_buffer = NULL;
1136 parser->m_bufferLim = NULL;
1137
1138 parser->m_attsSize = INIT_ATTS_SIZE;
1139 parser->m_atts
1140 = (ATTRIBUTE *)MALLOC(parser, parser->m_attsSize * sizeof(ATTRIBUTE));
1141 if (parser->m_atts == NULL) {
1142 FREE(parser, parser);
1143 return NULL;
1144 }
1145 #ifdef XML_ATTR_INFO
1146 parser->m_attInfo = (XML_AttrInfo *)MALLOC(
1147 parser, parser->m_attsSize * sizeof(XML_AttrInfo));
1148 if (parser->m_attInfo == NULL) {
1149 FREE(parser, parser->m_atts);
1150 FREE(parser, parser);
1151 return NULL;
1152 }
1153 #endif
1154 parser->m_dataBuf
1155 = (XML_Char *)MALLOC(parser, INIT_DATA_BUF_SIZE * sizeof(XML_Char));
1156 if (parser->m_dataBuf == NULL) {
1157 FREE(parser, parser->m_atts);
1158 #ifdef XML_ATTR_INFO
1159 FREE(parser, parser->m_attInfo);
1160 #endif
1161 FREE(parser, parser);
1162 return NULL;
1163 }
1164 parser->m_dataBufEnd = parser->m_dataBuf + INIT_DATA_BUF_SIZE;
1165
1166 if (dtd)
1167 parser->m_dtd = dtd;
1168 else {
1169 parser->m_dtd = dtdCreate(&parser->m_mem);
1170 if (parser->m_dtd == NULL) {
1171 FREE(parser, parser->m_dataBuf);
1172 FREE(parser, parser->m_atts);
1173 #ifdef XML_ATTR_INFO
1174 FREE(parser, parser->m_attInfo);
1175 #endif
1176 FREE(parser, parser);
1177 return NULL;
1178 }
1179 }
1180
1181 parser->m_freeBindingList = NULL;
1182 parser->m_freeTagList = NULL;
1183 parser->m_freeInternalEntities = NULL;
1184 parser->m_freeAttributeEntities = NULL;
1185 parser->m_freeValueEntities = NULL;
1186
1187 parser->m_groupSize = 0;
1188 parser->m_groupConnector = NULL;
1189
1190 parser->m_unknownEncodingHandler = NULL;
1191 parser->m_unknownEncodingHandlerData = NULL;
1192
1193 parser->m_namespaceSeparator = ASCII_EXCL;
1194 parser->m_ns = XML_FALSE;
1195 parser->m_ns_triplets = XML_FALSE;
1196
1197 parser->m_nsAtts = NULL;
1198 parser->m_nsAttsVersion = 0;
1199 parser->m_nsAttsPower = 0;
1200
1201 parser->m_protocolEncodingName = NULL;
1202
1203 poolInit(&parser->m_tempPool, &(parser->m_mem));
1204 poolInit(&parser->m_temp2Pool, &(parser->m_mem));
1205 parserInit(parser, encodingName);
1206
1207 if (encodingName && ! parser->m_protocolEncodingName) {
1208 if (dtd) {
1209 // We need to stop the upcoming call to XML_ParserFree from happily
1210 // destroying parser->m_dtd because the DTD is shared with the parent
1211 // parser and the only guard that keeps XML_ParserFree from destroying
1212 // parser->m_dtd is parser->m_isParamEntity but it will be set to
1213 // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all).
1214 parser->m_dtd = NULL;
1215 }
1216 XML_ParserFree(parser);
1217 return NULL;
1218 }
1219
1220 if (nameSep) {
1221 parser->m_ns = XML_TRUE;
1222 parser->m_internalEncoding = XmlGetInternalEncodingNS();
1223 parser->m_namespaceSeparator = *nameSep;
1224 } else {
1225 parser->m_internalEncoding = XmlGetInternalEncoding();
1226 }
1227
1228 return parser;
1229 }
1230
1231 static void
1232 parserInit(XML_Parser parser, const XML_Char *encodingName) {
1233 parser->m_processor = prologInitProcessor;
1234 XmlPrologStateInit(&parser->m_prologState);
1235 if (encodingName != NULL) {
1236 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1237 }
1238 parser->m_curBase = NULL;
1239 XmlInitEncoding(&parser->m_initEncoding, &parser->m_encoding, 0);
1240 parser->m_userData = NULL;
1241 parser->m_handlerArg = NULL;
1242 parser->m_startElementHandler = NULL;
1243 parser->m_endElementHandler = NULL;
1244 parser->m_characterDataHandler = NULL;
1245 parser->m_processingInstructionHandler = NULL;
1246 parser->m_commentHandler = NULL;
1247 parser->m_startCdataSectionHandler = NULL;
1248 parser->m_endCdataSectionHandler = NULL;
1249 parser->m_defaultHandler = NULL;
1250 parser->m_startDoctypeDeclHandler = NULL;
1251 parser->m_endDoctypeDeclHandler = NULL;
1252 parser->m_unparsedEntityDeclHandler = NULL;
1253 parser->m_notationDeclHandler = NULL;
1254 parser->m_startNamespaceDeclHandler = NULL;
1255 parser->m_endNamespaceDeclHandler = NULL;
1256 parser->m_notStandaloneHandler = NULL;
1257 parser->m_externalEntityRefHandler = NULL;
1258 parser->m_externalEntityRefHandlerArg = parser;
1259 parser->m_skippedEntityHandler = NULL;
1260 parser->m_elementDeclHandler = NULL;
1261 parser->m_attlistDeclHandler = NULL;
1262 parser->m_entityDeclHandler = NULL;
1263 parser->m_xmlDeclHandler = NULL;
1264 parser->m_bufferPtr = parser->m_buffer;
1265 parser->m_bufferEnd = parser->m_buffer;
1266 parser->m_parseEndByteIndex = 0;
1267 parser->m_parseEndPtr = NULL;
1268 parser->m_partialTokenBytesBefore = 0;
1269 parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
1270 parser->m_lastBufferRequestSize = 0;
1271 parser->m_declElementType = NULL;
1272 parser->m_declAttributeId = NULL;
1273 parser->m_declEntity = NULL;
1274 parser->m_doctypeName = NULL;
1275 parser->m_doctypeSysid = NULL;
1276 parser->m_doctypePubid = NULL;
1277 parser->m_declAttributeType = NULL;
1278 parser->m_declNotationName = NULL;
1279 parser->m_declNotationPublicId = NULL;
1280 parser->m_declAttributeIsCdata = XML_FALSE;
1281 parser->m_declAttributeIsId = XML_FALSE;
1282 memset(&parser->m_position, 0, sizeof(POSITION));
1283 parser->m_errorCode = XML_ERROR_NONE;
1284 parser->m_eventPtr = NULL;
1285 parser->m_eventEndPtr = NULL;
1286 parser->m_positionPtr = NULL;
1287 parser->m_openInternalEntities = NULL;
1288 parser->m_openAttributeEntities = NULL;
1289 parser->m_openValueEntities = NULL;
1290 parser->m_defaultExpandInternalEntities = XML_TRUE;
1291 parser->m_tagLevel = 0;
1292 parser->m_tagStack = NULL;
1293 parser->m_inheritedBindings = NULL;
1294 parser->m_nSpecifiedAtts = 0;
1295 parser->m_unknownEncodingMem = NULL;
1296 parser->m_unknownEncodingRelease = NULL;
1297 parser->m_unknownEncodingData = NULL;
1298 parser->m_parentParser = NULL;
1299 parser->m_parsingStatus.parsing = XML_INITIALIZED;
1300 // Reentry can only be triggered inside m_processor calls
1301 parser->m_reenter = XML_FALSE;
1302 #ifdef XML_DTD
1303 parser->m_isParamEntity = XML_FALSE;
1304 parser->m_useForeignDTD = XML_FALSE;
1305 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
1306 #endif
1307 parser->m_hash_secret_salt = 0;
1308
1309 #if XML_GE == 1
1310 memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
1311 parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
1312 parser->m_accounting.maximumAmplificationFactor
1313 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT;
1314 parser->m_accounting.activationThresholdBytes
1315 = EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT;
1316
1317 memset(&parser->m_entity_stats, 0, sizeof(ENTITY_STATS));
1318 parser->m_entity_stats.debugLevel = getDebugLevel("EXPAT_ENTITY_DEBUG", 0u);
1319 #endif
1320 }
1321
1322 /* moves list of bindings to m_freeBindingList */
1323 static void FASTCALL
1324 moveToFreeBindingList(XML_Parser parser, BINDING *bindings) {
1325 while (bindings) {
1326 BINDING *b = bindings;
1327 bindings = bindings->nextTagBinding;
1328 b->nextTagBinding = parser->m_freeBindingList;
1329 parser->m_freeBindingList = b;
1330 }
1331 }
1332
1333 XML_Bool XMLCALL
1334 XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) {
1335 TAG *tStk;
1336 OPEN_INTERNAL_ENTITY *openEntityList;
1337
1338 if (parser == NULL)
1339 return XML_FALSE;
1340
1341 if (parser->m_parentParser)
1342 return XML_FALSE;
1343 /* move m_tagStack to m_freeTagList */
1344 tStk = parser->m_tagStack;
1345 while (tStk) {
1346 TAG *tag = tStk;
1347 tStk = tStk->parent;
1348 tag->parent = parser->m_freeTagList;
1349 moveToFreeBindingList(parser, tag->bindings);
1350 tag->bindings = NULL;
1351 parser->m_freeTagList = tag;
1352 }
1353 /* move m_openInternalEntities to m_freeInternalEntities */
1354 openEntityList = parser->m_openInternalEntities;
1355 while (openEntityList) {
1356 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1357 openEntityList = openEntity->next;
1358 openEntity->next = parser->m_freeInternalEntities;
1359 parser->m_freeInternalEntities = openEntity;
1360 }
1361 /* move m_openAttributeEntities to m_freeAttributeEntities (i.e. same task but
1362 * for attributes) */
1363 openEntityList = parser->m_openAttributeEntities;
1364 while (openEntityList) {
1365 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1366 openEntityList = openEntity->next;
1367 openEntity->next = parser->m_freeAttributeEntities;
1368 parser->m_freeAttributeEntities = openEntity;
1369 }
1370 /* move m_openValueEntities to m_freeValueEntities (i.e. same task but
1371 * for value entities) */
1372 openEntityList = parser->m_openValueEntities;
1373 while (openEntityList) {
1374 OPEN_INTERNAL_ENTITY *openEntity = openEntityList;
1375 openEntityList = openEntity->next;
1376 openEntity->next = parser->m_freeValueEntities;
1377 parser->m_freeValueEntities = openEntity;
1378 }
1379 moveToFreeBindingList(parser, parser->m_inheritedBindings);
1380 FREE(parser, parser->m_unknownEncodingMem);
1381 if (parser->m_unknownEncodingRelease)
1382 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1383 poolClear(&parser->m_tempPool);
1384 poolClear(&parser->m_temp2Pool);
1385 FREE(parser, (void *)parser->m_protocolEncodingName);
1386 parser->m_protocolEncodingName = NULL;
1387 parserInit(parser, encodingName);
1388 dtdReset(parser->m_dtd, &parser->m_mem);
1389 return XML_TRUE;
1390 }
1391
1392 static XML_Bool
1393 parserBusy(XML_Parser parser) {
1394 switch (parser->m_parsingStatus.parsing) {
1395 case XML_PARSING:
1396 case XML_SUSPENDED:
1397 return XML_TRUE;
1398 case XML_INITIALIZED:
1399 case XML_FINISHED:
1400 default:
1401 return XML_FALSE;
1402 }
1403 }
1404
1405 enum XML_Status XMLCALL
1406 XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) {
1407 if (parser == NULL)
1408 return XML_STATUS_ERROR;
1409 /* Block after XML_Parse()/XML_ParseBuffer() has been called.
1410 XXX There's no way for the caller to determine which of the
1411 XXX possible error cases caused the XML_STATUS_ERROR return.
1412 */
1413 if (parserBusy(parser))
1414 return XML_STATUS_ERROR;
1415
1416 /* Get rid of any previous encoding name */
1417 FREE(parser, (void *)parser->m_protocolEncodingName);
1418
1419 if (encodingName == NULL)
1420 /* No new encoding name */
1421 parser->m_protocolEncodingName = NULL;
1422 else {
1423 /* Copy the new encoding name into allocated memory */
1424 parser->m_protocolEncodingName = copyString(encodingName, &(parser->m_mem));
1425 if (! parser->m_protocolEncodingName)
1426 return XML_STATUS_ERROR;
1427 }
1428 return XML_STATUS_OK;
1429 }
1430
1431 XML_Parser XMLCALL
1432 XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
1433 const XML_Char *encodingName) {
1434 XML_Parser parser = oldParser;
1435 DTD *newDtd = NULL;
1436 DTD *oldDtd;
1437 XML_StartElementHandler oldStartElementHandler;
1438 XML_EndElementHandler oldEndElementHandler;
1439 XML_CharacterDataHandler oldCharacterDataHandler;
1440 XML_ProcessingInstructionHandler oldProcessingInstructionHandler;
1441 XML_CommentHandler oldCommentHandler;
1442 XML_StartCdataSectionHandler oldStartCdataSectionHandler;
1443 XML_EndCdataSectionHandler oldEndCdataSectionHandler;
1444 XML_DefaultHandler oldDefaultHandler;
1445 XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler;
1446 XML_NotationDeclHandler oldNotationDeclHandler;
1447 XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler;
1448 XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler;
1449 XML_NotStandaloneHandler oldNotStandaloneHandler;
1450 XML_ExternalEntityRefHandler oldExternalEntityRefHandler;
1451 XML_SkippedEntityHandler oldSkippedEntityHandler;
1452 XML_UnknownEncodingHandler oldUnknownEncodingHandler;
1453 XML_ElementDeclHandler oldElementDeclHandler;
1454 XML_AttlistDeclHandler oldAttlistDeclHandler;
1455 XML_EntityDeclHandler oldEntityDeclHandler;
1456 XML_XmlDeclHandler oldXmlDeclHandler;
1457 ELEMENT_TYPE *oldDeclElementType;
1458
1459 void *oldUserData;
1460 void *oldHandlerArg;
1461 XML_Bool oldDefaultExpandInternalEntities;
1462 XML_Parser oldExternalEntityRefHandlerArg;
1463 #ifdef XML_DTD
1464 enum XML_ParamEntityParsing oldParamEntityParsing;
1465 int oldInEntityValue;
1466 #endif
1467 XML_Bool oldns_triplets;
1468 /* Note that the new parser shares the same hash secret as the old
1469 parser, so that dtdCopy and copyEntityTable can lookup values
1470 from hash tables associated with either parser without us having
1471 to worry which hash secrets each table has.
1472 */
1473 unsigned long oldhash_secret_salt;
1474 XML_Bool oldReparseDeferralEnabled;
1475
1476 /* Validate the oldParser parameter before we pull everything out of it */
1477 if (oldParser == NULL)
1478 return NULL;
1479
1480 /* Stash the original parser contents on the stack */
1481 oldDtd = parser->m_dtd;
1482 oldStartElementHandler = parser->m_startElementHandler;
1483 oldEndElementHandler = parser->m_endElementHandler;
1484 oldCharacterDataHandler = parser->m_characterDataHandler;
1485 oldProcessingInstructionHandler = parser->m_processingInstructionHandler;
1486 oldCommentHandler = parser->m_commentHandler;
1487 oldStartCdataSectionHandler = parser->m_startCdataSectionHandler;
1488 oldEndCdataSectionHandler = parser->m_endCdataSectionHandler;
1489 oldDefaultHandler = parser->m_defaultHandler;
1490 oldUnparsedEntityDeclHandler = parser->m_unparsedEntityDeclHandler;
1491 oldNotationDeclHandler = parser->m_notationDeclHandler;
1492 oldStartNamespaceDeclHandler = parser->m_startNamespaceDeclHandler;
1493 oldEndNamespaceDeclHandler = parser->m_endNamespaceDeclHandler;
1494 oldNotStandaloneHandler = parser->m_notStandaloneHandler;
1495 oldExternalEntityRefHandler = parser->m_externalEntityRefHandler;
1496 oldSkippedEntityHandler = parser->m_skippedEntityHandler;
1497 oldUnknownEncodingHandler = parser->m_unknownEncodingHandler;
1498 oldElementDeclHandler = parser->m_elementDeclHandler;
1499 oldAttlistDeclHandler = parser->m_attlistDeclHandler;
1500 oldEntityDeclHandler = parser->m_entityDeclHandler;
1501 oldXmlDeclHandler = parser->m_xmlDeclHandler;
1502 oldDeclElementType = parser->m_declElementType;
1503
1504 oldUserData = parser->m_userData;
1505 oldHandlerArg = parser->m_handlerArg;
1506 oldDefaultExpandInternalEntities = parser->m_defaultExpandInternalEntities;
1507 oldExternalEntityRefHandlerArg = parser->m_externalEntityRefHandlerArg;
1508 #ifdef XML_DTD
1509 oldParamEntityParsing = parser->m_paramEntityParsing;
1510 oldInEntityValue = parser->m_prologState.inEntityValue;
1511 #endif
1512 oldns_triplets = parser->m_ns_triplets;
1513 /* Note that the new parser shares the same hash secret as the old
1514 parser, so that dtdCopy and copyEntityTable can lookup values
1515 from hash tables associated with either parser without us having
1516 to worry which hash secrets each table has.
1517 */
1518 oldhash_secret_salt = parser->m_hash_secret_salt;
1519 oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
1520
1521 #ifdef XML_DTD
1522 if (! context)
1523 newDtd = oldDtd;
1524 #endif /* XML_DTD */
1525
1526 /* Note that the magical uses of the pre-processor to make field
1527 access look more like C++ require that `parser' be overwritten
1528 here. This makes this function more painful to follow than it
1529 would be otherwise.
1530 */
1531 if (parser->m_ns) {
1532 XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
1533 parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
1534 } else {
1535 parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
1536 }
1537
1538 if (! parser)
1539 return NULL;
1540
1541 parser->m_startElementHandler = oldStartElementHandler;
1542 parser->m_endElementHandler = oldEndElementHandler;
1543 parser->m_characterDataHandler = oldCharacterDataHandler;
1544 parser->m_processingInstructionHandler = oldProcessingInstructionHandler;
1545 parser->m_commentHandler = oldCommentHandler;
1546 parser->m_startCdataSectionHandler = oldStartCdataSectionHandler;
1547 parser->m_endCdataSectionHandler = oldEndCdataSectionHandler;
1548 parser->m_defaultHandler = oldDefaultHandler;
1549 parser->m_unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler;
1550 parser->m_notationDeclHandler = oldNotationDeclHandler;
1551 parser->m_startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
1552 parser->m_endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
1553 parser->m_notStandaloneHandler = oldNotStandaloneHandler;
1554 parser->m_externalEntityRefHandler = oldExternalEntityRefHandler;
1555 parser->m_skippedEntityHandler = oldSkippedEntityHandler;
1556 parser->m_unknownEncodingHandler = oldUnknownEncodingHandler;
1557 parser->m_elementDeclHandler = oldElementDeclHandler;
1558 parser->m_attlistDeclHandler = oldAttlistDeclHandler;
1559 parser->m_entityDeclHandler = oldEntityDeclHandler;
1560 parser->m_xmlDeclHandler = oldXmlDeclHandler;
1561 parser->m_declElementType = oldDeclElementType;
1562 parser->m_userData = oldUserData;
1563 if (oldUserData == oldHandlerArg)
1564 parser->m_handlerArg = parser->m_userData;
1565 else
1566 parser->m_handlerArg = parser;
1567 if (oldExternalEntityRefHandlerArg != oldParser)
1568 parser->m_externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
1569 parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1570 parser->m_ns_triplets = oldns_triplets;
1571 parser->m_hash_secret_salt = oldhash_secret_salt;
1572 parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
1573 parser->m_parentParser = oldParser;
1574 #ifdef XML_DTD
1575 parser->m_paramEntityParsing = oldParamEntityParsing;
1576 parser->m_prologState.inEntityValue = oldInEntityValue;
1577 if (context) {
1578 #endif /* XML_DTD */
1579 if (! dtdCopy(oldParser, parser->m_dtd, oldDtd, &parser->m_mem)
1580 || ! setContext(parser, context)) {
1581 XML_ParserFree(parser);
1582 return NULL;
1583 }
1584 parser->m_processor = externalEntityInitProcessor;
1585 #ifdef XML_DTD
1586 } else {
1587 /* The DTD instance referenced by parser->m_dtd is shared between the
1588 document's root parser and external PE parsers, therefore one does not
1589 need to call setContext. In addition, one also *must* not call
1590 setContext, because this would overwrite existing prefix->binding
1591 pointers in parser->m_dtd with ones that get destroyed with the external
1592 PE parser. This would leave those prefixes with dangling pointers.
1593 */
1594 parser->m_isParamEntity = XML_TRUE;
1595 XmlPrologStateInitExternalEntity(&parser->m_prologState);
1596 parser->m_processor = externalParEntInitProcessor;
1597 }
1598 #endif /* XML_DTD */
1599 return parser;
1600 }
1601
1602 static void FASTCALL
1603 destroyBindings(BINDING *bindings, XML_Parser parser) {
1604 for (;;) {
1605 BINDING *b = bindings;
1606 if (! b)
1607 break;
1608 bindings = b->nextTagBinding;
1609 FREE(parser, b->uri);
1610 FREE(parser, b);
1611 }
1612 }
1613
1614 void XMLCALL
1615 XML_ParserFree(XML_Parser parser) {
1616 TAG *tagList;
1617 OPEN_INTERNAL_ENTITY *entityList;
1618 if (parser == NULL)
1619 return;
1620 /* free m_tagStack and m_freeTagList */
1621 tagList = parser->m_tagStack;
1622 for (;;) {
1623 TAG *p;
1624 if (tagList == NULL) {
1625 if (parser->m_freeTagList == NULL)
1626 break;
1627 tagList = parser->m_freeTagList;
1628 parser->m_freeTagList = NULL;
1629 }
1630 p = tagList;
1631 tagList = tagList->parent;
1632 FREE(parser, p->buf);
1633 destroyBindings(p->bindings, parser);
1634 FREE(parser, p);
1635 }
1636 /* free m_openInternalEntities and m_freeInternalEntities */
1637 entityList = parser->m_openInternalEntities;
1638 for (;;) {
1639 OPEN_INTERNAL_ENTITY *openEntity;
1640 if (entityList == NULL) {
1641 if (parser->m_freeInternalEntities == NULL)
1642 break;
1643 entityList = parser->m_freeInternalEntities;
1644 parser->m_freeInternalEntities = NULL;
1645 }
1646 openEntity = entityList;
1647 entityList = entityList->next;
1648 FREE(parser, openEntity);
1649 }
1650 /* free m_openAttributeEntities and m_freeAttributeEntities */
1651 entityList = parser->m_openAttributeEntities;
1652 for (;;) {
1653 OPEN_INTERNAL_ENTITY *openEntity;
1654 if (entityList == NULL) {
1655 if (parser->m_freeAttributeEntities == NULL)
1656 break;
1657 entityList = parser->m_freeAttributeEntities;
1658 parser->m_freeAttributeEntities = NULL;
1659 }
1660 openEntity = entityList;
1661 entityList = entityList->next;
1662 FREE(parser, openEntity);
1663 }
1664 /* free m_openValueEntities and m_freeValueEntities */
1665 entityList = parser->m_openValueEntities;
1666 for (;;) {
1667 OPEN_INTERNAL_ENTITY *openEntity;
1668 if (entityList == NULL) {
1669 if (parser->m_freeValueEntities == NULL)
1670 break;
1671 entityList = parser->m_freeValueEntities;
1672 parser->m_freeValueEntities = NULL;
1673 }
1674 openEntity = entityList;
1675 entityList = entityList->next;
1676 FREE(parser, openEntity);
1677 }
1678 destroyBindings(parser->m_freeBindingList, parser);
1679 destroyBindings(parser->m_inheritedBindings, parser);
1680 poolDestroy(&parser->m_tempPool);
1681 poolDestroy(&parser->m_temp2Pool);
1682 FREE(parser, (void *)parser->m_protocolEncodingName);
1683 #ifdef XML_DTD
1684 /* external parameter entity parsers share the DTD structure
1685 parser->m_dtd with the root parser, so we must not destroy it
1686 */
1687 if (! parser->m_isParamEntity && parser->m_dtd)
1688 #else
1689 if (parser->m_dtd)
1690 #endif /* XML_DTD */
1691 dtdDestroy(parser->m_dtd, (XML_Bool)! parser->m_parentParser,
1692 &parser->m_mem);
1693 FREE(parser, (void *)parser->m_atts);
1694 #ifdef XML_ATTR_INFO
1695 FREE(parser, (void *)parser->m_attInfo);
1696 #endif
1697 FREE(parser, parser->m_groupConnector);
1698 FREE(parser, parser->m_buffer);
1699 FREE(parser, parser->m_dataBuf);
1700 FREE(parser, parser->m_nsAtts);
1701 FREE(parser, parser->m_unknownEncodingMem);
1702 if (parser->m_unknownEncodingRelease)
1703 parser->m_unknownEncodingRelease(parser->m_unknownEncodingData);
1704 FREE(parser, parser);
1705 }
1706
1707 void XMLCALL
1708 XML_UseParserAsHandlerArg(XML_Parser parser) {
1709 if (parser != NULL)
1710 parser->m_handlerArg = parser;
1711 }
1712
1713 enum XML_Error XMLCALL
1714 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) {
1715 if (parser == NULL)
1716 return XML_ERROR_INVALID_ARGUMENT;
1717 #ifdef XML_DTD
1718 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1719 if (parserBusy(parser))
1720 return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING;
1721 parser->m_useForeignDTD = useDTD;
1722 return XML_ERROR_NONE;
1723 #else
1724 UNUSED_P(useDTD);
1725 return XML_ERROR_FEATURE_REQUIRES_XML_DTD;
1726 #endif
1727 }
1728
1729 void XMLCALL
1730 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) {
1731 if (parser == NULL)
1732 return;
1733 /* block after XML_Parse()/XML_ParseBuffer() has been called */
1734 if (parserBusy(parser))
1735 return;
1736 parser->m_ns_triplets = do_nst ? XML_TRUE : XML_FALSE;
1737 }
1738
1739 void XMLCALL
1740 XML_SetUserData(XML_Parser parser, void *p) {
1741 if (parser == NULL)
1742 return;
1743 if (parser->m_handlerArg == parser->m_userData)
1744 parser->m_handlerArg = parser->m_userData = p;
1745 else
1746 parser->m_userData = p;
1747 }
1748
1749 enum XML_Status XMLCALL
1750 XML_SetBase(XML_Parser parser, const XML_Char *p) {
1751 if (parser == NULL)
1752 return XML_STATUS_ERROR;
1753 if (p) {
1754 p = poolCopyString(&parser->m_dtd->pool, p);
1755 if (! p)
1756 return XML_STATUS_ERROR;
1757 parser->m_curBase = p;
1758 } else
1759 parser->m_curBase = NULL;
1760 return XML_STATUS_OK;
1761 }
1762
1763 const XML_Char *XMLCALL
1764 XML_GetBase(XML_Parser parser) {
1765 if (parser == NULL)
1766 return NULL;
1767 return parser->m_curBase;
1768 }
1769
1770 int XMLCALL
1771 XML_GetSpecifiedAttributeCount(XML_Parser parser) {
1772 if (parser == NULL)
1773 return -1;
1774 return parser->m_nSpecifiedAtts;
1775 }
1776
1777 int XMLCALL
1778 XML_GetIdAttributeIndex(XML_Parser parser) {
1779 if (parser == NULL)
1780 return -1;
1781 return parser->m_idAttIndex;
1782 }
1783
1784 #ifdef XML_ATTR_INFO
1785 const XML_AttrInfo *XMLCALL
1786 XML_GetAttributeInfo(XML_Parser parser) {
1787 if (parser == NULL)
1788 return NULL;
1789 return parser->m_attInfo;
1790 }
1791 #endif
1792
1793 void XMLCALL
1794 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start,
1795 XML_EndElementHandler end) {
1796 if (parser == NULL)
1797 return;
1798 parser->m_startElementHandler = start;
1799 parser->m_endElementHandler = end;
1800 }
1801
1802 void XMLCALL
1803 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) {
1804 if (parser != NULL)
1805 parser->m_startElementHandler = start;
1806 }
1807
1808 void XMLCALL
1809 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) {
1810 if (parser != NULL)
1811 parser->m_endElementHandler = end;
1812 }
1813
1814 void XMLCALL
1815 XML_SetCharacterDataHandler(XML_Parser parser,
1816 XML_CharacterDataHandler handler) {
1817 if (parser != NULL)
1818 parser->m_characterDataHandler = handler;
1819 }
1820
1821 void XMLCALL
1822 XML_SetProcessingInstructionHandler(XML_Parser parser,
1823 XML_ProcessingInstructionHandler handler) {
1824 if (parser != NULL)
1825 parser->m_processingInstructionHandler = handler;
1826 }
1827
1828 void XMLCALL
1829 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) {
1830 if (parser != NULL)
1831 parser->m_commentHandler = handler;
1832 }
1833
1834 void XMLCALL
1835 XML_SetCdataSectionHandler(XML_Parser parser,
1836 XML_StartCdataSectionHandler start,
1837 XML_EndCdataSectionHandler end) {
1838 if (parser == NULL)
1839 return;
1840 parser->m_startCdataSectionHandler = start;
1841 parser->m_endCdataSectionHandler = end;
1842 }
1843
1844 void XMLCALL
1845 XML_SetStartCdataSectionHandler(XML_Parser parser,
1846 XML_StartCdataSectionHandler start) {
1847 if (parser != NULL)
1848 parser->m_startCdataSectionHandler = start;
1849 }
1850
1851 void XMLCALL
1852 XML_SetEndCdataSectionHandler(XML_Parser parser,
1853 XML_EndCdataSectionHandler end) {
1854 if (parser != NULL)
1855 parser->m_endCdataSectionHandler = end;
1856 }
1857
1858 void XMLCALL
1859 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) {
1860 if (parser == NULL)
1861 return;
1862 parser->m_defaultHandler = handler;
1863 parser->m_defaultExpandInternalEntities = XML_FALSE;
1864 }
1865
1866 void XMLCALL
1867 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) {
1868 if (parser == NULL)
1869 return;
1870 parser->m_defaultHandler = handler;
1871 parser->m_defaultExpandInternalEntities = XML_TRUE;
1872 }
1873
1874 void XMLCALL
1875 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start,
1876 XML_EndDoctypeDeclHandler end) {
1877 if (parser == NULL)
1878 return;
1879 parser->m_startDoctypeDeclHandler = start;
1880 parser->m_endDoctypeDeclHandler = end;
1881 }
1882
1883 void XMLCALL
1884 XML_SetStartDoctypeDeclHandler(XML_Parser parser,
1885 XML_StartDoctypeDeclHandler start) {
1886 if (parser != NULL)
1887 parser->m_startDoctypeDeclHandler = start;
1888 }
1889
1890 void XMLCALL
1891 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) {
1892 if (parser != NULL)
1893 parser->m_endDoctypeDeclHandler = end;
1894 }
1895
1896 void XMLCALL
1897 XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
1898 XML_UnparsedEntityDeclHandler handler) {
1899 if (parser != NULL)
1900 parser->m_unparsedEntityDeclHandler = handler;
1901 }
1902
1903 void XMLCALL
1904 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) {
1905 if (parser != NULL)
1906 parser->m_notationDeclHandler = handler;
1907 }
1908
1909 void XMLCALL
1910 XML_SetNamespaceDeclHandler(XML_Parser parser,
1911 XML_StartNamespaceDeclHandler start,
1912 XML_EndNamespaceDeclHandler end) {
1913 if (parser == NULL)
1914 return;
1915 parser->m_startNamespaceDeclHandler = start;
1916 parser->m_endNamespaceDeclHandler = end;
1917 }
1918
1919 void XMLCALL
1920 XML_SetStartNamespaceDeclHandler(XML_Parser parser,
1921 XML_StartNamespaceDeclHandler start) {
1922 if (parser != NULL)
1923 parser->m_startNamespaceDeclHandler = start;
1924 }
1925
1926 void XMLCALL
1927 XML_SetEndNamespaceDeclHandler(XML_Parser parser,
1928 XML_EndNamespaceDeclHandler end) {
1929 if (parser != NULL)
1930 parser->m_endNamespaceDeclHandler = end;
1931 }
1932
1933 void XMLCALL
1934 XML_SetNotStandaloneHandler(XML_Parser parser,
1935 XML_NotStandaloneHandler handler) {
1936 if (parser != NULL)
1937 parser->m_notStandaloneHandler = handler;
1938 }
1939
1940 void XMLCALL
1941 XML_SetExternalEntityRefHandler(XML_Parser parser,
1942 XML_ExternalEntityRefHandler handler) {
1943 if (parser != NULL)
1944 parser->m_externalEntityRefHandler = handler;
1945 }
1946
1947 void XMLCALL
1948 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) {
1949 if (parser == NULL)
1950 return;
1951 if (arg)
1952 parser->m_externalEntityRefHandlerArg = (XML_Parser)arg;
1953 else
1954 parser->m_externalEntityRefHandlerArg = parser;
1955 }
1956
1957 void XMLCALL
1958 XML_SetSkippedEntityHandler(XML_Parser parser,
1959 XML_SkippedEntityHandler handler) {
1960 if (parser != NULL)
1961 parser->m_skippedEntityHandler = handler;
1962 }
1963
1964 void XMLCALL
1965 XML_SetUnknownEncodingHandler(XML_Parser parser,
1966 XML_UnknownEncodingHandler handler, void *data) {
1967 if (parser == NULL)
1968 return;
1969 parser->m_unknownEncodingHandler = handler;
1970 parser->m_unknownEncodingHandlerData = data;
1971 }
1972
1973 void XMLCALL
1974 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) {
1975 if (parser != NULL)
1976 parser->m_elementDeclHandler = eldecl;
1977 }
1978
1979 void XMLCALL
1980 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) {
1981 if (parser != NULL)
1982 parser->m_attlistDeclHandler = attdecl;
1983 }
1984
1985 void XMLCALL
1986 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) {
1987 if (parser != NULL)
1988 parser->m_entityDeclHandler = handler;
1989 }
1990
1991 void XMLCALL
1992 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) {
1993 if (parser != NULL)
1994 parser->m_xmlDeclHandler = handler;
1995 }
1996
1997 int XMLCALL
1998 XML_SetParamEntityParsing(XML_Parser parser,
1999 enum XML_ParamEntityParsing peParsing) {
2000 if (parser == NULL)
2001 return 0;
2002 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2003 if (parserBusy(parser))
2004 return 0;
2005 #ifdef XML_DTD
2006 parser->m_paramEntityParsing = peParsing;
2007 return 1;
2008 #else
2009 return peParsing == XML_PARAM_ENTITY_PARSING_NEVER;
2010 #endif
2011 }
2012
2013 int XMLCALL
2014 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) {
2015 if (parser == NULL)
2016 return 0;
2017 if (parser->m_parentParser)
2018 return XML_SetHashSalt(parser->m_parentParser, hash_salt);
2019 /* block after XML_Parse()/XML_ParseBuffer() has been called */
2020 if (parserBusy(parser))
2021 return 0;
2022 parser->m_hash_secret_salt = hash_salt;
2023 return 1;
2024 }
2025
2026 enum XML_Status XMLCALL
2027 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
2028 if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) {
2029 if (parser != NULL)
2030 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2031 return XML_STATUS_ERROR;
2032 }
2033 switch (parser->m_parsingStatus.parsing) {
2034 case XML_SUSPENDED:
2035 parser->m_errorCode = XML_ERROR_SUSPENDED;
2036 return XML_STATUS_ERROR;
2037 case XML_FINISHED:
2038 parser->m_errorCode = XML_ERROR_FINISHED;
2039 return XML_STATUS_ERROR;
2040 case XML_INITIALIZED:
2041 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2042 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2043 return XML_STATUS_ERROR;
2044 }
2045 /* fall through */
2046 default:
2047 parser->m_parsingStatus.parsing = XML_PARSING;
2048 }
2049
2050 #if XML_CONTEXT_BYTES == 0
2051 if (parser->m_bufferPtr == parser->m_bufferEnd) {
2052 const char *end;
2053 int nLeftOver;
2054 enum XML_Status result;
2055 /* Detect overflow (a+b > MAX <==> b > MAX-a) */
2056 if ((XML_Size)len > ((XML_Size)-1) / 2 - parser->m_parseEndByteIndex) {
2057 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2058 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2059 parser->m_processor = errorProcessor;
2060 return XML_STATUS_ERROR;
2061 }
2062 // though this isn't a buffer request, we assume that `len` is the app's
2063 // preferred buffer fill size, and therefore save it here.
2064 parser->m_lastBufferRequestSize = len;
2065 parser->m_parseEndByteIndex += len;
2066 parser->m_positionPtr = s;
2067 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2068
2069 parser->m_errorCode
2070 = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
2071
2072 if (parser->m_errorCode != XML_ERROR_NONE) {
2073 parser->m_eventEndPtr = parser->m_eventPtr;
2074 parser->m_processor = errorProcessor;
2075 return XML_STATUS_ERROR;
2076 } else {
2077 switch (parser->m_parsingStatus.parsing) {
2078 case XML_SUSPENDED:
2079 result = XML_STATUS_SUSPENDED;
2080 break;
2081 case XML_INITIALIZED:
2082 case XML_PARSING:
2083 if (isFinal) {
2084 parser->m_parsingStatus.parsing = XML_FINISHED;
2085 return XML_STATUS_OK;
2086 }
2087 /* fall through */
2088 default:
2089 result = XML_STATUS_OK;
2090 }
2091 }
2092
2093 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, end,
2094 &parser->m_position);
2095 nLeftOver = s + len - end;
2096 if (nLeftOver) {
2097 // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
2098 // (and XML_ERROR_FINISHED) from XML_GetBuffer.
2099 const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
2100 parser->m_parsingStatus.parsing = XML_PARSING;
2101 void *const temp = XML_GetBuffer(parser, nLeftOver);
2102 parser->m_parsingStatus.parsing = originalStatus;
2103 // GetBuffer may have overwritten this, but we want to remember what the
2104 // app requested, not how many bytes were left over after parsing.
2105 parser->m_lastBufferRequestSize = len;
2106 if (temp == NULL) {
2107 // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
2108 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2109 parser->m_processor = errorProcessor;
2110 return XML_STATUS_ERROR;
2111 }
2112 // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
2113 // don't have any data to preserve, and can copy straight into the start
2114 // of the buffer rather than the GetBuffer return pointer (which may be
2115 // pointing further into the allocated buffer).
2116 memcpy(parser->m_buffer, end, nLeftOver);
2117 }
2118 parser->m_bufferPtr = parser->m_buffer;
2119 parser->m_bufferEnd = parser->m_buffer + nLeftOver;
2120 parser->m_positionPtr = parser->m_bufferPtr;
2121 parser->m_parseEndPtr = parser->m_bufferEnd;
2122 parser->m_eventPtr = parser->m_bufferPtr;
2123 parser->m_eventEndPtr = parser->m_bufferPtr;
2124 return result;
2125 }
2126 #endif /* XML_CONTEXT_BYTES == 0 */
2127 void *buff = XML_GetBuffer(parser, len);
2128 if (buff == NULL)
2129 return XML_STATUS_ERROR;
2130 if (len > 0) {
2131 assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
2132 memcpy(buff, s, len);
2133 }
2134 return XML_ParseBuffer(parser, len, isFinal);
2135 }
2136
2137 enum XML_Status XMLCALL
2138 XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
2139 const char *start;
2140 enum XML_Status result = XML_STATUS_OK;
2141
2142 if (parser == NULL)
2143 return XML_STATUS_ERROR;
2144
2145 if (len < 0) {
2146 parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT;
2147 return XML_STATUS_ERROR;
2148 }
2149
2150 switch (parser->m_parsingStatus.parsing) {
2151 case XML_SUSPENDED:
2152 parser->m_errorCode = XML_ERROR_SUSPENDED;
2153 return XML_STATUS_ERROR;
2154 case XML_FINISHED:
2155 parser->m_errorCode = XML_ERROR_FINISHED;
2156 return XML_STATUS_ERROR;
2157 case XML_INITIALIZED:
2158 /* Has someone called XML_GetBuffer successfully before? */
2159 if (! parser->m_bufferPtr) {
2160 parser->m_errorCode = XML_ERROR_NO_BUFFER;
2161 return XML_STATUS_ERROR;
2162 }
2163
2164 if (parser->m_parentParser == NULL && ! startParsing(parser)) {
2165 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2166 return XML_STATUS_ERROR;
2167 }
2168 /* fall through */
2169 default:
2170 parser->m_parsingStatus.parsing = XML_PARSING;
2171 }
2172
2173 start = parser->m_bufferPtr;
2174 parser->m_positionPtr = start;
2175 parser->m_bufferEnd += len;
2176 parser->m_parseEndPtr = parser->m_bufferEnd;
2177 parser->m_parseEndByteIndex += len;
2178 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
2179
2180 parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
2181 &parser->m_bufferPtr);
2182
2183 if (parser->m_errorCode != XML_ERROR_NONE) {
2184 parser->m_eventEndPtr = parser->m_eventPtr;
2185 parser->m_processor = errorProcessor;
2186 return XML_STATUS_ERROR;
2187 } else {
2188 switch (parser->m_parsingStatus.parsing) {
2189 case XML_SUSPENDED:
2190 result = XML_STATUS_SUSPENDED;
2191 break;
2192 case XML_INITIALIZED:
2193 case XML_PARSING:
2194 if (isFinal) {
2195 parser->m_parsingStatus.parsing = XML_FINISHED;
2196 return result;
2197 }
2198 default:; /* should not happen */
2199 }
2200 }
2201
2202 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2203 parser->m_bufferPtr, &parser->m_position);
2204 parser->m_positionPtr = parser->m_bufferPtr;
2205 return result;
2206 }
2207
2208 void *XMLCALL
2209 XML_GetBuffer(XML_Parser parser, int len) {
2210 if (parser == NULL)
2211 return NULL;
2212 if (len < 0) {
2213 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2214 return NULL;
2215 }
2216 switch (parser->m_parsingStatus.parsing) {
2217 case XML_SUSPENDED:
2218 parser->m_errorCode = XML_ERROR_SUSPENDED;
2219 return NULL;
2220 case XML_FINISHED:
2221 parser->m_errorCode = XML_ERROR_FINISHED;
2222 return NULL;
2223 default:;
2224 }
2225
2226 // whether or not the request succeeds, `len` seems to be the app's preferred
2227 // buffer fill size; remember it.
2228 parser->m_lastBufferRequestSize = len;
2229 if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
2230 || parser->m_buffer == NULL) {
2231 #if XML_CONTEXT_BYTES > 0
2232 int keep;
2233 #endif /* XML_CONTEXT_BYTES > 0 */
2234 /* Do not invoke signed arithmetic overflow: */
2235 int neededSize = (int)((unsigned)len
2236 + (unsigned)EXPAT_SAFE_PTR_DIFF(
2237 parser->m_bufferEnd, parser->m_bufferPtr));
2238 if (neededSize < 0) {
2239 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2240 return NULL;
2241 }
2242 #if XML_CONTEXT_BYTES > 0
2243 keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
2244 if (keep > XML_CONTEXT_BYTES)
2245 keep = XML_CONTEXT_BYTES;
2246 /* Detect and prevent integer overflow */
2247 if (keep > INT_MAX - neededSize) {
2248 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2249 return NULL;
2250 }
2251 neededSize += keep;
2252 #endif /* XML_CONTEXT_BYTES > 0 */
2253 if (parser->m_buffer && parser->m_bufferPtr
2254 && neededSize
2255 <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
2256 #if XML_CONTEXT_BYTES > 0
2257 if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
2258 int offset
2259 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
2260 - keep;
2261 /* The buffer pointers cannot be NULL here; we have at least some bytes
2262 * in the buffer */
2263 memmove(parser->m_buffer, &parser->m_buffer[offset],
2264 parser->m_bufferEnd - parser->m_bufferPtr + keep);
2265 parser->m_bufferEnd -= offset;
2266 parser->m_bufferPtr -= offset;
2267 }
2268 #else
2269 memmove(parser->m_buffer, parser->m_bufferPtr,
2270 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2271 parser->m_bufferEnd
2272 = parser->m_buffer
2273 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2274 parser->m_bufferPtr = parser->m_buffer;
2275 #endif /* XML_CONTEXT_BYTES > 0 */
2276 } else {
2277 char *newBuf;
2278 int bufferSize
2279 = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
2280 if (bufferSize == 0)
2281 bufferSize = INIT_BUFFER_SIZE;
2282 do {
2283 /* Do not invoke signed arithmetic overflow: */
2284 bufferSize = (int)(2U * (unsigned)bufferSize);
2285 } while (bufferSize < neededSize && bufferSize > 0);
2286 if (bufferSize <= 0) {
2287 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2288 return NULL;
2289 }
2290 newBuf = (char *)MALLOC(parser, bufferSize);
2291 if (newBuf == 0) {
2292 parser->m_errorCode = XML_ERROR_NO_MEMORY;
2293 return NULL;
2294 }
2295 parser->m_bufferLim = newBuf + bufferSize;
2296 #if XML_CONTEXT_BYTES > 0
2297 if (parser->m_bufferPtr) {
2298 memcpy(newBuf, &parser->m_bufferPtr[-keep],
2299 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2300 + keep);
2301 FREE(parser, parser->m_buffer);
2302 parser->m_buffer = newBuf;
2303 parser->m_bufferEnd
2304 = parser->m_buffer
2305 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
2306 + keep;
2307 parser->m_bufferPtr = parser->m_buffer + keep;
2308 } else {
2309 /* This must be a brand new buffer with no data in it yet */
2310 parser->m_bufferEnd = newBuf;
2311 parser->m_bufferPtr = parser->m_buffer = newBuf;
2312 }
2313 #else
2314 if (parser->m_bufferPtr) {
2315 memcpy(newBuf, parser->m_bufferPtr,
2316 EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
2317 FREE(parser, parser->m_buffer);
2318 parser->m_bufferEnd
2319 = newBuf
2320 + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
2321 } else {
2322 /* This must be a brand new buffer with no data in it yet */
2323 parser->m_bufferEnd = newBuf;
2324 }
2325 parser->m_bufferPtr = parser->m_buffer = newBuf;
2326 #endif /* XML_CONTEXT_BYTES > 0 */
2327 }
2328 parser->m_eventPtr = parser->m_eventEndPtr = NULL;
2329 parser->m_positionPtr = NULL;
2330 }
2331 return parser->m_bufferEnd;
2332 }
2333
2334 static void
2335 triggerReenter(XML_Parser parser) {
2336 parser->m_reenter = XML_TRUE;
2337 }
2338
2339 enum XML_Status XMLCALL
2340 XML_StopParser(XML_Parser parser, XML_Bool resumable) {
2341 if (parser == NULL)
2342 return XML_STATUS_ERROR;
2343 switch (parser->m_parsingStatus.parsing) {
2344 case XML_INITIALIZED:
2345 parser->m_errorCode = XML_ERROR_NOT_STARTED;
2346 return XML_STATUS_ERROR;
2347 case XML_SUSPENDED:
2348 if (resumable) {
2349 parser->m_errorCode = XML_ERROR_SUSPENDED;
2350 return XML_STATUS_ERROR;
2351 }
2352 parser->m_parsingStatus.parsing = XML_FINISHED;
2353 break;
2354 case XML_FINISHED:
2355 parser->m_errorCode = XML_ERROR_FINISHED;
2356 return XML_STATUS_ERROR;
2357 case XML_PARSING:
2358 if (resumable) {
2359 #ifdef XML_DTD
2360 if (parser->m_isParamEntity) {
2361 parser->m_errorCode = XML_ERROR_SUSPEND_PE;
2362 return XML_STATUS_ERROR;
2363 }
2364 #endif
2365 parser->m_parsingStatus.parsing = XML_SUSPENDED;
2366 } else
2367 parser->m_parsingStatus.parsing = XML_FINISHED;
2368 break;
2369 default:
2370 assert(0);
2371 }
2372 return XML_STATUS_OK;
2373 }
2374
2375 enum XML_Status XMLCALL
2376 XML_ResumeParser(XML_Parser parser) {
2377 enum XML_Status result = XML_STATUS_OK;
2378
2379 if (parser == NULL)
2380 return XML_STATUS_ERROR;
2381 if (parser->m_parsingStatus.parsing != XML_SUSPENDED) {
2382 parser->m_errorCode = XML_ERROR_NOT_SUSPENDED;
2383 return XML_STATUS_ERROR;
2384 }
2385 parser->m_parsingStatus.parsing = XML_PARSING;
2386
2387 parser->m_errorCode = callProcessor(
2388 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
2389
2390 if (parser->m_errorCode != XML_ERROR_NONE) {
2391 parser->m_eventEndPtr = parser->m_eventPtr;
2392 parser->m_processor = errorProcessor;
2393 return XML_STATUS_ERROR;
2394 } else {
2395 switch (parser->m_parsingStatus.parsing) {
2396 case XML_SUSPENDED:
2397 result = XML_STATUS_SUSPENDED;
2398 break;
2399 case XML_INITIALIZED:
2400 case XML_PARSING:
2401 if (parser->m_parsingStatus.finalBuffer) {
2402 parser->m_parsingStatus.parsing = XML_FINISHED;
2403 return result;
2404 }
2405 default:;
2406 }
2407 }
2408
2409 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2410 parser->m_bufferPtr, &parser->m_position);
2411 parser->m_positionPtr = parser->m_bufferPtr;
2412 return result;
2413 }
2414
2415 void XMLCALL
2416 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) {
2417 if (parser == NULL)
2418 return;
2419 assert(status != NULL);
2420 *status = parser->m_parsingStatus;
2421 }
2422
2423 enum XML_Error XMLCALL
2424 XML_GetErrorCode(XML_Parser parser) {
2425 if (parser == NULL)
2426 return XML_ERROR_INVALID_ARGUMENT;
2427 return parser->m_errorCode;
2428 }
2429
2430 XML_Index XMLCALL
2431 XML_GetCurrentByteIndex(XML_Parser parser) {
2432 if (parser == NULL)
2433 return -1;
2434 if (parser->m_eventPtr)
2435 return (XML_Index)(parser->m_parseEndByteIndex
2436 - (parser->m_parseEndPtr - parser->m_eventPtr));
2437 return -1;
2438 }
2439
2440 int XMLCALL
2441 XML_GetCurrentByteCount(XML_Parser parser) {
2442 if (parser == NULL)
2443 return 0;
2444 if (parser->m_eventEndPtr && parser->m_eventPtr)
2445 return (int)(parser->m_eventEndPtr - parser->m_eventPtr);
2446 return 0;
2447 }
2448
2449 const char *XMLCALL
2450 XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
2451 #if XML_CONTEXT_BYTES > 0
2452 if (parser == NULL)
2453 return NULL;
2454 if (parser->m_eventPtr && parser->m_buffer) {
2455 if (offset != NULL)
2456 *offset = (int)(parser->m_eventPtr - parser->m_buffer);
2457 if (size != NULL)
2458 *size = (int)(parser->m_bufferEnd - parser->m_buffer);
2459 return parser->m_buffer;
2460 }
2461 #else
2462 (void)parser;
2463 (void)offset;
2464 (void)size;
2465 #endif /* XML_CONTEXT_BYTES > 0 */
2466 return (const char *)0;
2467 }
2468
2469 XML_Size XMLCALL
2470 XML_GetCurrentLineNumber(XML_Parser parser) {
2471 if (parser == NULL)
2472 return 0;
2473 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2474 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2475 parser->m_eventPtr, &parser->m_position);
2476 parser->m_positionPtr = parser->m_eventPtr;
2477 }
2478 return parser->m_position.lineNumber + 1;
2479 }
2480
2481 XML_Size XMLCALL
2482 XML_GetCurrentColumnNumber(XML_Parser parser) {
2483 if (parser == NULL)
2484 return 0;
2485 if (parser->m_eventPtr && parser->m_eventPtr >= parser->m_positionPtr) {
2486 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
2487 parser->m_eventPtr, &parser->m_position);
2488 parser->m_positionPtr = parser->m_eventPtr;
2489 }
2490 return parser->m_position.columnNumber;
2491 }
2492
2493 void XMLCALL
2494 XML_FreeContentModel(XML_Parser parser, XML_Content *model) {
2495 if (parser != NULL)
2496 FREE(parser, model);
2497 }
2498
2499 void *XMLCALL
2500 XML_MemMalloc(XML_Parser parser, size_t size) {
2501 if (parser == NULL)
2502 return NULL;
2503 return MALLOC(parser, size);
2504 }
2505
2506 void *XMLCALL
2507 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) {
2508 if (parser == NULL)
2509 return NULL;
2510 return REALLOC(parser, ptr, size);
2511 }
2512
2513 void XMLCALL
2514 XML_MemFree(XML_Parser parser, void *ptr) {
2515 if (parser != NULL)
2516 FREE(parser, ptr);
2517 }
2518
2519 void XMLCALL
2520 XML_DefaultCurrent(XML_Parser parser) {
2521 if (parser == NULL)
2522 return;
2523 if (parser->m_defaultHandler) {
2524 if (parser->m_openInternalEntities)
2525 reportDefault(parser, parser->m_internalEncoding,
2526 parser->m_openInternalEntities->internalEventPtr,
2527 parser->m_openInternalEntities->internalEventEndPtr);
2528 else
2529 reportDefault(parser, parser->m_encoding, parser->m_eventPtr,
2530 parser->m_eventEndPtr);
2531 }
2532 }
2533
2534 const XML_LChar *XMLCALL
2535 XML_ErrorString(enum XML_Error code) {
2536 switch (code) {
2537 case XML_ERROR_NONE:
2538 return NULL;
2539 case XML_ERROR_NO_MEMORY:
2540 return XML_L("out of memory");
2541 case XML_ERROR_SYNTAX:
2542 return XML_L("syntax error");
2543 case XML_ERROR_NO_ELEMENTS:
2544 return XML_L("no element found");
2545 case XML_ERROR_INVALID_TOKEN:
2546 return XML_L("not well-formed (invalid token)");
2547 case XML_ERROR_UNCLOSED_TOKEN:
2548 return XML_L("unclosed token");
2549 case XML_ERROR_PARTIAL_CHAR:
2550 return XML_L("partial character");
2551 case XML_ERROR_TAG_MISMATCH:
2552 return XML_L("mismatched tag");
2553 case XML_ERROR_DUPLICATE_ATTRIBUTE:
2554 return XML_L("duplicate attribute");
2555 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
2556 return XML_L("junk after document element");
2557 case XML_ERROR_PARAM_ENTITY_REF:
2558 return XML_L("illegal parameter entity reference");
2559 case XML_ERROR_UNDEFINED_ENTITY:
2560 return XML_L("undefined entity");
2561 case XML_ERROR_RECURSIVE_ENTITY_REF:
2562 return XML_L("recursive entity reference");
2563 case XML_ERROR_ASYNC_ENTITY:
2564 return XML_L("asynchronous entity");
2565 case XML_ERROR_BAD_CHAR_REF:
2566 return XML_L("reference to invalid character number");
2567 case XML_ERROR_BINARY_ENTITY_REF:
2568 return XML_L("reference to binary entity");
2569 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
2570 return XML_L("reference to external entity in attribute");
2571 case XML_ERROR_MISPLACED_XML_PI:
2572 return XML_L("XML or text declaration not at start of entity");
2573 case XML_ERROR_UNKNOWN_ENCODING:
2574 return XML_L("unknown encoding");
2575 case XML_ERROR_INCORRECT_ENCODING:
2576 return XML_L("encoding specified in XML declaration is incorrect");
2577 case XML_ERROR_UNCLOSED_CDATA_SECTION:
2578 return XML_L("unclosed CDATA section");
2579 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
2580 return XML_L("error in processing external entity reference");
2581 case XML_ERROR_NOT_STANDALONE:
2582 return XML_L("document is not standalone");
2583 case XML_ERROR_UNEXPECTED_STATE:
2584 return XML_L("unexpected parser state - please send a bug report");
2585 case XML_ERROR_ENTITY_DECLARED_IN_PE:
2586 return XML_L("entity declared in parameter entity");
2587 case XML_ERROR_FEATURE_REQUIRES_XML_DTD:
2588 return XML_L("requested feature requires XML_DTD support in Expat");
2589 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING:
2590 return XML_L("cannot change setting once parsing has begun");
2591 /* Added in 1.95.7. */
2592 case XML_ERROR_UNBOUND_PREFIX:
2593 return XML_L("unbound prefix");
2594 /* Added in 1.95.8. */
2595 case XML_ERROR_UNDECLARING_PREFIX:
2596 return XML_L("must not undeclare prefix");
2597 case XML_ERROR_INCOMPLETE_PE:
2598 return XML_L("incomplete markup in parameter entity");
2599 case XML_ERROR_XML_DECL:
2600 return XML_L("XML declaration not well-formed");
2601 case XML_ERROR_TEXT_DECL:
2602 return XML_L("text declaration not well-formed");
2603 case XML_ERROR_PUBLICID:
2604 return XML_L("illegal character(s) in public id");
2605 case XML_ERROR_SUSPENDED:
2606 return XML_L("parser suspended");
2607 case XML_ERROR_NOT_SUSPENDED:
2608 return XML_L("parser not suspended");
2609 case XML_ERROR_ABORTED:
2610 return XML_L("parsing aborted");
2611 case XML_ERROR_FINISHED:
2612 return XML_L("parsing finished");
2613 case XML_ERROR_SUSPEND_PE:
2614 return XML_L("cannot suspend in external parameter entity");
2615 /* Added in 2.0.0. */
2616 case XML_ERROR_RESERVED_PREFIX_XML:
2617 return XML_L(
2618 "reserved prefix (xml) must not be undeclared or bound to another namespace name");
2619 case XML_ERROR_RESERVED_PREFIX_XMLNS:
2620 return XML_L("reserved prefix (xmlns) must not be declared or undeclared");
2621 case XML_ERROR_RESERVED_NAMESPACE_URI:
2622 return XML_L(
2623 "prefix must not be bound to one of the reserved namespace names");
2624 /* Added in 2.2.5. */
2625 case XML_ERROR_INVALID_ARGUMENT: /* Constant added in 2.2.1, already */
2626 return XML_L("invalid argument");
2627 /* Added in 2.3.0. */
2628 case XML_ERROR_NO_BUFFER:
2629 return XML_L(
2630 "a successful prior call to function XML_GetBuffer is required");
2631 /* Added in 2.4.0. */
2632 case XML_ERROR_AMPLIFICATION_LIMIT_BREACH:
2633 return XML_L(
2634 "limit on input amplification factor (from DTD and entities) breached");
2635 /* Added in 2.6.4. */
2636 case XML_ERROR_NOT_STARTED:
2637 return XML_L("parser not started");
2638 }
2639 return NULL;
2640 }
2641
2642 const XML_LChar *XMLCALL
2643 XML_ExpatVersion(void) {
2644 /* V1 is used to string-ize the version number. However, it would
2645 string-ize the actual version macro *names* unless we get them
2646 substituted before being passed to V1. CPP is defined to expand
2647 a macro, then rescan for more expansions. Thus, we use V2 to expand
2648 the version macros, then CPP will expand the resulting V1() macro
2649 with the correct numerals. */
2650 /* ### I'm assuming cpp is portable in this respect... */
2651
2652 #define V1(a, b, c) XML_L(#a) XML_L(".") XML_L(#b) XML_L(".") XML_L(#c)
2653 #define V2(a, b, c) XML_L("expat_") V1(a, b, c)
2654
2655 return V2(XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION);
2656
2657 #undef V1
2658 #undef V2
2659 }
2660
2661 XML_Expat_Version XMLCALL
2662 XML_ExpatVersionInfo(void) {
2663 XML_Expat_Version version;
2664
2665 version.major = XML_MAJOR_VERSION;
2666 version.minor = XML_MINOR_VERSION;
2667 version.micro = XML_MICRO_VERSION;
2668
2669 return version;
2670 }
2671
2672 const XML_Feature *XMLCALL
2673 XML_GetFeatureList(void) {
2674 static const XML_Feature features[] = {
2675 {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"),
2676 sizeof(XML_Char)},
2677 {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"),
2678 sizeof(XML_LChar)},
2679 #ifdef XML_UNICODE
2680 {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0},
2681 #endif
2682 #ifdef XML_UNICODE_WCHAR_T
2683 {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0},
2684 #endif
2685 #ifdef XML_DTD
2686 {XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
2687 #endif
2688 #if XML_CONTEXT_BYTES > 0
2689 {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
2690 XML_CONTEXT_BYTES},
2691 #endif
2692 #ifdef XML_MIN_SIZE
2693 {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0},
2694 #endif
2695 #ifdef XML_NS
2696 {XML_FEATURE_NS, XML_L("XML_NS"), 0},
2697 #endif
2698 #ifdef XML_LARGE_SIZE
2699 {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0},
2700 #endif
2701 #ifdef XML_ATTR_INFO
2702 {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
2703 #endif
2704 #if XML_GE == 1
2705 /* Added in Expat 2.4.0 for XML_DTD defined and
2706 * added in Expat 2.6.0 for XML_GE == 1. */
2707 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
2708 XML_L("XML_BLAP_MAX_AMP"),
2709 (long int)
2710 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT},
2711 {XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
2712 XML_L("XML_BLAP_ACT_THRES"),
2713 EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
2714 /* Added in Expat 2.6.0. */
2715 {XML_FEATURE_GE, XML_L("XML_GE"), 0},
2716 #endif
2717 {XML_FEATURE_END, NULL, 0}};
2718
2719 return features;
2720 }
2721
2722 #if XML_GE == 1
2723 XML_Bool XMLCALL
2724 XML_SetBillionLaughsAttackProtectionMaximumAmplification(
2725 XML_Parser parser, float maximumAmplificationFactor) {
2726 if ((parser == NULL) || (parser->m_parentParser != NULL)
2727 || isnan(maximumAmplificationFactor)
2728 || (maximumAmplificationFactor < 1.0f)) {
2729 return XML_FALSE;
2730 }
2731 parser->m_accounting.maximumAmplificationFactor = maximumAmplificationFactor;
2732 return XML_TRUE;
2733 }
2734
2735 XML_Bool XMLCALL
2736 XML_SetBillionLaughsAttackProtectionActivationThreshold(
2737 XML_Parser parser, unsigned long long activationThresholdBytes) {
2738 if ((parser == NULL) || (parser->m_parentParser != NULL)) {
2739 return XML_FALSE;
2740 }
2741 parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
2742 return XML_TRUE;
2743 }
2744 #endif /* XML_GE == 1 */
2745
2746 XML_Bool XMLCALL
2747 XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
2748 if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
2749 parser->m_reparseDeferralEnabled = enabled;
2750 return XML_TRUE;
2751 }
2752 return XML_FALSE;
2753 }
2754
2755 /* Initially tag->rawName always points into the parse buffer;
2756 for those TAG instances opened while the current parse buffer was
2757 processed, and not yet closed, we need to store tag->rawName in a more
2758 permanent location, since the parse buffer is about to be discarded.
2759 */
2760 static XML_Bool
2761 storeRawNames(XML_Parser parser) {
2762 TAG *tag = parser->m_tagStack;
2763 while (tag) {
2764 int bufSize;
2765 int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
2766 size_t rawNameLen;
2767 char *rawNameBuf = tag->buf + nameLen;
2768 /* Stop if already stored. Since m_tagStack is a stack, we can stop
2769 at the first entry that has already been copied; everything
2770 below it in the stack is already been accounted for in a
2771 previous call to this function.
2772 */
2773 if (tag->rawName == rawNameBuf)
2774 break;
2775 /* For reuse purposes we need to ensure that the
2776 size of tag->buf is a multiple of sizeof(XML_Char).
2777 */
2778 rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
2779 /* Detect and prevent integer overflow. */
2780 if (rawNameLen > (size_t)INT_MAX - nameLen)
2781 return XML_FALSE;
2782 bufSize = nameLen + (int)rawNameLen;
2783 if (bufSize > tag->bufEnd - tag->buf) {
2784 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
2785 if (temp == NULL)
2786 return XML_FALSE;
2787 /* if tag->name.str points to tag->buf (only when namespace
2788 processing is off) then we have to update it
2789 */
2790 if (tag->name.str == (XML_Char *)tag->buf)
2791 tag->name.str = (XML_Char *)temp;
2792 /* if tag->name.localPart is set (when namespace processing is on)
2793 then update it as well, since it will always point into tag->buf
2794 */
2795 if (tag->name.localPart)
2796 tag->name.localPart
2797 = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf);
2798 tag->buf = temp;
2799 tag->bufEnd = temp + bufSize;
2800 rawNameBuf = temp + nameLen;
2801 }
2802 memcpy(rawNameBuf, tag->rawName, tag->rawNameLength);
2803 tag->rawName = rawNameBuf;
2804 tag = tag->parent;
2805 }
2806 return XML_TRUE;
2807 }
2808
2809 static enum XML_Error PTRCALL
2810 contentProcessor(XML_Parser parser, const char *start, const char *end,
2811 const char **endPtr) {
2812 enum XML_Error result = doContent(
2813 parser, parser->m_parentParser ? 1 : 0, parser->m_encoding, start, end,
2814 endPtr, (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2815 XML_ACCOUNT_DIRECT);
2816 if (result == XML_ERROR_NONE) {
2817 if (! storeRawNames(parser))
2818 return XML_ERROR_NO_MEMORY;
2819 }
2820 return result;
2821 }
2822
2823 static enum XML_Error PTRCALL
2824 externalEntityInitProcessor(XML_Parser parser, const char *start,
2825 const char *end, const char **endPtr) {
2826 enum XML_Error result = initializeEncoding(parser);
2827 if (result != XML_ERROR_NONE)
2828 return result;
2829 parser->m_processor = externalEntityInitProcessor2;
2830 return externalEntityInitProcessor2(parser, start, end, endPtr);
2831 }
2832
2833 static enum XML_Error PTRCALL
2834 externalEntityInitProcessor2(XML_Parser parser, const char *start,
2835 const char *end, const char **endPtr) {
2836 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2837 int tok = XmlContentTok(parser->m_encoding, start, end, &next);
2838 switch (tok) {
2839 case XML_TOK_BOM:
2840 #if XML_GE == 1
2841 if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
2842 XML_ACCOUNT_DIRECT)) {
2843 accountingOnAbort(parser);
2844 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2845 }
2846 #endif /* XML_GE == 1 */
2847
2848 /* If we are at the end of the buffer, this would cause the next stage,
2849 i.e. externalEntityInitProcessor3, to pass control directly to
2850 doContent (by detecting XML_TOK_NONE) without processing any xml text
2851 declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent.
2852 */
2853 if (next == end && ! parser->m_parsingStatus.finalBuffer) {
2854 *endPtr = next;
2855 return XML_ERROR_NONE;
2856 }
2857 start = next;
2858 break;
2859 case XML_TOK_PARTIAL:
2860 if (! parser->m_parsingStatus.finalBuffer) {
2861 *endPtr = start;
2862 return XML_ERROR_NONE;
2863 }
2864 parser->m_eventPtr = start;
2865 return XML_ERROR_UNCLOSED_TOKEN;
2866 case XML_TOK_PARTIAL_CHAR:
2867 if (! parser->m_parsingStatus.finalBuffer) {
2868 *endPtr = start;
2869 return XML_ERROR_NONE;
2870 }
2871 parser->m_eventPtr = start;
2872 return XML_ERROR_PARTIAL_CHAR;
2873 }
2874 parser->m_processor = externalEntityInitProcessor3;
2875 return externalEntityInitProcessor3(parser, start, end, endPtr);
2876 }
2877
2878 static enum XML_Error PTRCALL
2879 externalEntityInitProcessor3(XML_Parser parser, const char *start,
2880 const char *end, const char **endPtr) {
2881 int tok;
2882 const char *next = start; /* XmlContentTok doesn't always set the last arg */
2883 parser->m_eventPtr = start;
2884 tok = XmlContentTok(parser->m_encoding, start, end, &next);
2885 /* Note: These bytes are accounted later in:
2886 - processXmlDecl
2887 - externalEntityContentProcessor
2888 */
2889 parser->m_eventEndPtr = next;
2890
2891 switch (tok) {
2892 case XML_TOK_XML_DECL: {
2893 enum XML_Error result;
2894 result = processXmlDecl(parser, 1, start, next);
2895 if (result != XML_ERROR_NONE)
2896 return result;
2897 switch (parser->m_parsingStatus.parsing) {
2898 case XML_SUSPENDED:
2899 *endPtr = next;
2900 return XML_ERROR_NONE;
2901 case XML_FINISHED:
2902 return XML_ERROR_ABORTED;
2903 case XML_PARSING:
2904 if (parser->m_reenter) {
2905 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
2906 }
2907 /* Fall through */
2908 default:
2909 start = next;
2910 }
2911 } break;
2912 case XML_TOK_PARTIAL:
2913 if (! parser->m_parsingStatus.finalBuffer) {
2914 *endPtr = start;
2915 return XML_ERROR_NONE;
2916 }
2917 return XML_ERROR_UNCLOSED_TOKEN;
2918 case XML_TOK_PARTIAL_CHAR:
2919 if (! parser->m_parsingStatus.finalBuffer) {
2920 *endPtr = start;
2921 return XML_ERROR_NONE;
2922 }
2923 return XML_ERROR_PARTIAL_CHAR;
2924 }
2925 parser->m_processor = externalEntityContentProcessor;
2926 parser->m_tagLevel = 1;
2927 return externalEntityContentProcessor(parser, start, end, endPtr);
2928 }
2929
2930 static enum XML_Error PTRCALL
2931 externalEntityContentProcessor(XML_Parser parser, const char *start,
2932 const char *end, const char **endPtr) {
2933 enum XML_Error result
2934 = doContent(parser, 1, parser->m_encoding, start, end, endPtr,
2935 (XML_Bool)! parser->m_parsingStatus.finalBuffer,
2936 XML_ACCOUNT_ENTITY_EXPANSION);
2937 if (result == XML_ERROR_NONE) {
2938 if (! storeRawNames(parser))
2939 return XML_ERROR_NO_MEMORY;
2940 }
2941 return result;
2942 }
2943
2944 static enum XML_Error
2945 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
2946 const char *s, const char *end, const char **nextPtr,
2947 XML_Bool haveMore, enum XML_Account account) {
2948 /* save one level of indirection */
2949 DTD *const dtd = parser->m_dtd;
2950
2951 const char **eventPP;
2952 const char **eventEndPP;
2953 if (enc == parser->m_encoding) {
2954 eventPP = &parser->m_eventPtr;
2955 eventEndPP = &parser->m_eventEndPtr;
2956 } else {
2957 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
2958 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
2959 }
2960 *eventPP = s;
2961
2962 for (;;) {
2963 const char *next = s; /* XmlContentTok doesn't always set the last arg */
2964 int tok = XmlContentTok(enc, s, end, &next);
2965 #if XML_GE == 1
2966 const char *accountAfter
2967 = ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
2968 ? (haveMore ? s /* i.e. 0 bytes */ : end)
2969 : next;
2970 if (! accountingDiffTolerated(parser, tok, s, accountAfter, __LINE__,
2971 account)) {
2972 accountingOnAbort(parser);
2973 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
2974 }
2975 #endif
2976 *eventEndPP = next;
2977 switch (tok) {
2978 case XML_TOK_TRAILING_CR:
2979 if (haveMore) {
2980 *nextPtr = s;
2981 return XML_ERROR_NONE;
2982 }
2983 *eventEndPP = end;
2984 if (parser->m_characterDataHandler) {
2985 XML_Char c = 0xA;
2986 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
2987 } else if (parser->m_defaultHandler)
2988 reportDefault(parser, enc, s, end);
2989 /* We are at the end of the final buffer, should we check for
2990 XML_SUSPENDED, XML_FINISHED?
2991 */
2992 if (startTagLevel == 0)
2993 return XML_ERROR_NO_ELEMENTS;
2994 if (parser->m_tagLevel != startTagLevel)
2995 return XML_ERROR_ASYNC_ENTITY;
2996 *nextPtr = end;
2997 return XML_ERROR_NONE;
2998 case XML_TOK_NONE:
2999 if (haveMore) {
3000 *nextPtr = s;
3001 return XML_ERROR_NONE;
3002 }
3003 if (startTagLevel > 0) {
3004 if (parser->m_tagLevel != startTagLevel)
3005 return XML_ERROR_ASYNC_ENTITY;
3006 *nextPtr = s;
3007 return XML_ERROR_NONE;
3008 }
3009 return XML_ERROR_NO_ELEMENTS;
3010 case XML_TOK_INVALID:
3011 *eventPP = next;
3012 return XML_ERROR_INVALID_TOKEN;
3013 case XML_TOK_PARTIAL:
3014 if (haveMore) {
3015 *nextPtr = s;
3016 return XML_ERROR_NONE;
3017 }
3018 return XML_ERROR_UNCLOSED_TOKEN;
3019 case XML_TOK_PARTIAL_CHAR:
3020 if (haveMore) {
3021 *nextPtr = s;
3022 return XML_ERROR_NONE;
3023 }
3024 return XML_ERROR_PARTIAL_CHAR;
3025 case XML_TOK_ENTITY_REF: {
3026 const XML_Char *name;
3027 ENTITY *entity;
3028 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
3029 enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
3030 if (ch) {
3031 #if XML_GE == 1
3032 /* NOTE: We are replacing 4-6 characters original input for 1 character
3033 * so there is no amplification and hence recording without
3034 * protection. */
3035 accountingDiffTolerated(parser, tok, (char *)&ch,
3036 ((char *)&ch) + sizeof(XML_Char), __LINE__,
3037 XML_ACCOUNT_ENTITY_EXPANSION);
3038 #endif /* XML_GE == 1 */
3039 if (parser->m_characterDataHandler)
3040 parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
3041 else if (parser->m_defaultHandler)
3042 reportDefault(parser, enc, s, next);
3043 break;
3044 }
3045 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
3046 next - enc->minBytesPerChar);
3047 if (! name)
3048 return XML_ERROR_NO_MEMORY;
3049 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
3050 poolDiscard(&dtd->pool);
3051 /* First, determine if a check for an existing declaration is needed;
3052 if yes, check that the entity exists, and that it is internal,
3053 otherwise call the skipped entity or default handler.
3054 */
3055 if (! dtd->hasParamEntityRefs || dtd->standalone) {
3056 if (! entity)
3057 return XML_ERROR_UNDEFINED_ENTITY;
3058 else if (! entity->is_internal)
3059 return XML_ERROR_ENTITY_DECLARED_IN_PE;
3060 } else if (! entity) {
3061 if (parser->m_skippedEntityHandler)
3062 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
3063 else if (parser->m_defaultHandler)
3064 reportDefault(parser, enc, s, next);
3065 break;
3066 }
3067 if (entity->open)
3068 return XML_ERROR_RECURSIVE_ENTITY_REF;
3069 if (entity->notation)
3070 return XML_ERROR_BINARY_ENTITY_REF;
3071 if (entity->textPtr) {
3072 enum XML_Error result;
3073 if (! parser->m_defaultExpandInternalEntities) {
3074 if (parser->m_skippedEntityHandler)
3075 parser->m_skippedEntityHandler(parser->m_handlerArg, entity->name,
3076 0);
3077 else if (parser->m_defaultHandler)
3078 reportDefault(parser, enc, s, next);
3079 break;
3080 }
3081 result = processEntity(parser, entity, XML_FALSE, ENTITY_INTERNAL);
3082 if (result != XML_ERROR_NONE)
3083 return result;
3084 } else if (parser->m_externalEntityRefHandler) {
3085 const XML_Char *context;
3086 entity->open = XML_TRUE;
3087 context = getContext(parser);
3088 entity->open = XML_FALSE;
3089 if (! context)
3090 return XML_ERROR_NO_MEMORY;
3091 if (! parser->m_externalEntityRefHandler(
3092 parser->m_externalEntityRefHandlerArg, context, entity->base,
3093 entity->systemId, entity->publicId))
3094 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
3095 poolDiscard(&parser->m_tempPool);
3096 } else if (parser->m_defaultHandler)
3097 reportDefault(parser, enc, s, next);
3098 break;
3099 }
3100 case XML_TOK_START_TAG_NO_ATTS:
3101 /* fall through */
3102 case XML_TOK_START_TAG_WITH_ATTS: {
3103 TAG *tag;
3104 enum XML_Error result;
3105 XML_Char *toPtr;
3106 if (parser->m_freeTagList) {
3107 tag = parser->m_freeTagList;
3108 parser->m_freeTagList = parser->m_freeTagList->parent;
3109 } else {
3110 tag = (TAG *)MALLOC(parser, sizeof(TAG));
3111 if (! tag)
3112 return XML_ERROR_NO_MEMORY;
3113 tag->buf = (char *)MALLOC(parser, INIT_TAG_BUF_SIZE);
3114 if (! tag->buf) {
3115 FREE(parser, tag);
3116 return XML_ERROR_NO_MEMORY;
3117 }
3118 tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
3119 }
3120 tag->bindings = NULL;
3121 tag->parent = parser->m_tagStack;
3122 parser->m_tagStack = tag;
3123 tag->name.localPart = NULL;
3124 tag->name.prefix = NULL;
3125 tag->rawName = s + enc->minBytesPerChar;
3126 tag->rawNameLength = XmlNameLength(enc, tag->rawName);
3127 ++parser->m_tagLevel;
3128 {
3129 const char *rawNameEnd = tag->rawName + tag->rawNameLength;
3130 const char *fromPtr = tag->rawName;
3131 toPtr = (XML_Char *)tag->buf;
3132 for (;;) {
3133 int bufSize;
3134 int convLen;
3135 const enum XML_Convert_Result convert_res
3136 = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr,
3137 (ICHAR *)tag->bufEnd - 1);
3138 convLen = (int)(toPtr - (XML_Char *)tag->buf);
3139 if ((fromPtr >= rawNameEnd)
3140 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) {
3141 tag->name.strLen = convLen;
3142 break;
3143 }
3144 bufSize = (int)(tag->bufEnd - tag->buf) << 1;
3145 {
3146 char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
3147 if (temp == NULL)
3148 return XML_ERROR_NO_MEMORY;
3149 tag->buf = temp;
3150 tag->bufEnd = temp + bufSize;
3151 toPtr = (XML_Char *)temp + convLen;
3152 }
3153 }
3154 }
3155 tag->name.str = (XML_Char *)tag->buf;
3156 *toPtr = XML_T('\0');
3157 result
3158 = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account);
3159 if (result)
3160 return result;
3161 if (parser->m_startElementHandler)
3162 parser->m_startElementHandler(parser->m_handlerArg, tag->name.str,
3163 (const XML_Char **)parser->m_atts);
3164 else if (parser->m_defaultHandler)
3165 reportDefault(parser, enc, s, next);
3166 poolClear(&parser->m_tempPool);
3167 break;
3168 }
3169 case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
3170 /* fall through */
3171 case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: {
3172 const char *rawName = s + enc->minBytesPerChar;
3173 enum XML_Error result;
3174 BINDING *bindings = NULL;
3175 XML_Bool noElmHandlers = XML_TRUE;
3176 TAG_NAME name;
3177 name.str = poolStoreString(&parser->m_tempPool, enc, rawName,
3178 rawName + XmlNameLength(enc, rawName));
3179 if (! name.str)
3180 return XML_ERROR_NO_MEMORY;
3181 poolFinish(&parser->m_tempPool);
3182 result = storeAtts(parser, enc, s, &name, &bindings,
3183 XML_ACCOUNT_NONE /* token spans whole start tag */);
3184 if (result != XML_ERROR_NONE) {
3185 freeBindings(parser, bindings);
3186 return result;
3187 }
3188 poolFinish(&parser->m_tempPool);
3189 if (parser->m_startElementHandler) {
3190 parser->m_startElementHandler(parser->m_handlerArg, name.str,
3191 (const XML_Char **)parser->m_atts);
3192 noElmHandlers = XML_FALSE;
3193 }
3194 if (parser->m_endElementHandler) {
3195 if (parser->m_startElementHandler)
3196 *eventPP = *eventEndPP;
3197 parser->m_endElementHandler(parser->m_handlerArg, name.str);
3198 noElmHandlers = XML_FALSE;
3199 }
3200 if (noElmHandlers && parser->m_defaultHandler)
3201 reportDefault(parser, enc, s, next);
3202 poolClear(&parser->m_tempPool);
3203 freeBindings(parser, bindings);
3204 }
3205 if ((parser->m_tagLevel == 0)
3206 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3207 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3208 || (parser->m_parsingStatus.parsing == XML_PARSING
3209 && parser->m_reenter))
3210 parser->m_processor = epilogProcessor;
3211 else
3212 return epilogProcessor(parser, next, end, nextPtr);
3213 }
3214 break;
3215 case XML_TOK_END_TAG:
3216 if (parser->m_tagLevel == startTagLevel)
3217 return XML_ERROR_ASYNC_ENTITY;
3218 else {
3219 int len;
3220 const char *rawName;
3221 TAG *tag = parser->m_tagStack;
3222 rawName = s + enc->minBytesPerChar * 2;
3223 len = XmlNameLength(enc, rawName);
3224 if (len != tag->rawNameLength
3225 || memcmp(tag->rawName, rawName, len) != 0) {
3226 *eventPP = rawName;
3227 return XML_ERROR_TAG_MISMATCH;
3228 }
3229 parser->m_tagStack = tag->parent;
3230 tag->parent = parser->m_freeTagList;
3231 parser->m_freeTagList = tag;
3232 --parser->m_tagLevel;
3233 if (parser->m_endElementHandler) {
3234 const XML_Char *localPart;
3235 const XML_Char *prefix;
3236 XML_Char *uri;
3237 localPart = tag->name.localPart;
3238 if (parser->m_ns && localPart) {
3239 /* localPart and prefix may have been overwritten in
3240 tag->name.str, since this points to the binding->uri
3241 buffer which gets reused; so we have to add them again
3242 */
3243 uri = (XML_Char *)tag->name.str + tag->name.uriLen;
3244 /* don't need to check for space - already done in storeAtts() */
3245 while (*localPart)
3246 *uri++ = *localPart++;
3247 prefix = tag->name.prefix;
3248 if (parser->m_ns_triplets && prefix) {
3249 *uri++ = parser->m_namespaceSeparator;
3250 while (*prefix)
3251 *uri++ = *prefix++;
3252 }
3253 *uri = XML_T('\0');
3254 }
3255 parser->m_endElementHandler(parser->m_handlerArg, tag->name.str);
3256 } else if (parser->m_defaultHandler)
3257 reportDefault(parser, enc, s, next);
3258 while (tag->bindings) {
3259 BINDING *b = tag->bindings;
3260 if (parser->m_endNamespaceDeclHandler)
3261 parser->m_endNamespaceDeclHandler(parser->m_handlerArg,
3262 b->prefix->name);
3263 tag->bindings = tag->bindings->nextTagBinding;
3264 b->nextTagBinding = parser->m_freeBindingList;
3265 parser->m_freeBindingList = b;
3266 b->prefix->binding = b->prevPrefixBinding;
3267 }
3268 if ((parser->m_tagLevel == 0)
3269 && (parser->m_parsingStatus.parsing != XML_FINISHED)) {
3270 if (parser->m_parsingStatus.parsing == XML_SUSPENDED
3271 || (parser->m_parsingStatus.parsing == XML_PARSING
3272 && parser->m_reenter))
3273 parser->m_processor = epilogProcessor;
3274 else
3275 return epilogProcessor(parser, next, end, nextPtr);
3276 }
3277 }
3278 break;
3279 case XML_TOK_CHAR_REF: {
3280 int n = XmlCharRefNumber(enc, s);
3281 if (n < 0)
3282 return XML_ERROR_BAD_CHAR_REF;
3283 if (parser->m_characterDataHandler) {
3284 XML_Char buf[XML_ENCODE_MAX];
3285 parser->m_characterDataHandler(parser->m_handlerArg, buf,
3286 XmlEncode(n, (ICHAR *)buf));
3287 } else if (parser->m_defaultHandler)
3288 reportDefault(parser, enc, s, next);
3289 } break;
3290 case XML_TOK_XML_DECL:
3291 return XML_ERROR_MISPLACED_XML_PI;
3292 case XML_TOK_DATA_NEWLINE:
3293 if (parser->m_characterDataHandler) {
3294 XML_Char c = 0xA;
3295 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
3296 } else if (parser->m_defaultHandler)
3297 reportDefault(parser, enc, s, next);
3298 break;
3299 case XML_TOK_CDATA_SECT_OPEN: {
3300 enum XML_Error result;
3301 if (parser->m_startCdataSectionHandler)
3302 parser->m_startCdataSectionHandler(parser->m_handlerArg);
3303 /* BEGIN disabled code */
3304 /* Suppose you doing a transformation on a document that involves
3305 changing only the character data. You set up a defaultHandler
3306 and a characterDataHandler. The defaultHandler simply copies
3307 characters through. The characterDataHandler does the
3308 transformation and writes the characters out escaping them as
3309 necessary. This case will fail to work if we leave out the
3310 following two lines (because & and < inside CDATA sections will
3311 be incorrectly escaped).
3312
3313 However, now we have a start/endCdataSectionHandler, so it seems
3314 easier to let the user deal with this.
3315 */
3316 else if ((0) && parser->m_characterDataHandler)
3317 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3318 0);
3319 /* END disabled code */
3320 else if (parser->m_defaultHandler)
3321 reportDefault(parser, enc, s, next);
3322 result
3323 = doCdataSection(parser, enc, &next, end, nextPtr, haveMore, account);
3324 if (result != XML_ERROR_NONE)
3325 return result;
3326 else if (! next) {
3327 parser->m_processor = cdataSectionProcessor;
3328 return result;
3329 }
3330 } break;
3331 case XML_TOK_TRAILING_RSQB:
3332 if (haveMore) {
3333 *nextPtr = s;
3334 return XML_ERROR_NONE;
3335 }
3336 if (parser->m_characterDataHandler) {
3337 if (MUST_CONVERT(enc, s)) {
3338 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3339 XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3340 parser->m_characterDataHandler(
3341 parser->m_handlerArg, parser->m_dataBuf,
3342 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3343 } else
3344 parser->m_characterDataHandler(
3345 parser->m_handlerArg, (const XML_Char *)s,
3346 (int)((const XML_Char *)end - (const XML_Char *)s));
3347 } else if (parser->m_defaultHandler)
3348 reportDefault(parser, enc, s, end);
3349 /* We are at the end of the final buffer, should we check for
3350 XML_SUSPENDED, XML_FINISHED?
3351 */
3352 if (startTagLevel == 0) {
3353 *eventPP = end;
3354 return XML_ERROR_NO_ELEMENTS;
3355 }
3356 if (parser->m_tagLevel != startTagLevel) {
3357 *eventPP = end;
3358 return XML_ERROR_ASYNC_ENTITY;
3359 }
3360 *nextPtr = end;
3361 return XML_ERROR_NONE;
3362 case XML_TOK_DATA_CHARS: {
3363 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
3364 if (charDataHandler) {
3365 if (MUST_CONVERT(enc, s)) {
3366 for (;;) {
3367 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
3368 const enum XML_Convert_Result convert_res = XmlConvert(
3369 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
3370 *eventEndPP = s;
3371 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
3372 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
3373 if ((convert_res == XML_CONVERT_COMPLETED)
3374 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
3375 break;
3376 *eventPP = s;
3377 }
3378 } else
3379 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
3380 (int)((const XML_Char *)next - (const XML_Char *)s));
3381 } else if (parser->m_defaultHandler)
3382 reportDefault(parser, enc, s, next);
3383 } break;
3384 case XML_TOK_PI:
3385 if (! reportProcessingInstruction(parser, enc, s, next))
3386 return XML_ERROR_NO_MEMORY;
3387 break;
3388 case XML_TOK_COMMENT:
3389 if (! reportComment(parser, enc, s, next))
3390 return XML_ERROR_NO_MEMORY;
3391 break;
3392 default:
3393 /* All of the tokens produced by XmlContentTok() have their own
3394 * explicit cases, so this default is not strictly necessary.
3395 * However it is a useful safety net, so we retain the code and
3396 * simply exclude it from the coverage tests.
3397 *
3398 * LCOV_EXCL_START
3399 */
3400 if (parser->m_defaultHandler)
3401 reportDefault(parser, enc, s, next);
3402 break;
3403 /* LCOV_EXCL_STOP */
3404 }
3405 *eventPP = s = next;
3406 switch (parser->m_parsingStatus.parsing) {
3407 case XML_SUSPENDED:
3408 *nextPtr = next;
3409 return XML_ERROR_NONE;
3410 case XML_FINISHED:
3411 return XML_ERROR_ABORTED;
3412 case XML_PARSING:
3413 if (parser->m_reenter) {
3414 *nextPtr = next;
3415 return XML_ERROR_NONE;
3416 }
3417 /* Fall through */
3418 default:;
3419 }
3420 }
3421 /* not reached */
3422 }
3423
3424 /* This function does not call free() on the allocated memory, merely
3425 * moving it to the parser's m_freeBindingList where it can be freed or
3426 * reused as appropriate.
3427 */
3428 static void
3429 freeBindings(XML_Parser parser, BINDING *bindings) {
3430 while (bindings) {
3431 BINDING *b = bindings;
3432
3433 /* m_startNamespaceDeclHandler will have been called for this
3434 * binding in addBindings(), so call the end handler now.
3435 */
3436 if (parser->m_endNamespaceDeclHandler)
3437 parser->m_endNamespaceDeclHandler(parser->m_handlerArg, b->prefix->name);
3438
3439 bindings = bindings->nextTagBinding;
3440 b->nextTagBinding = parser->m_freeBindingList;
3441 parser->m_freeBindingList = b;
3442 b->prefix->binding = b->prevPrefixBinding;
3443 }
3444 }
3445
3446 /* Precondition: all arguments must be non-NULL;
3447 Purpose:
3448 - normalize attributes
3449 - check attributes for well-formedness
3450 - generate namespace aware attribute names (URI, prefix)
3451 - build list of attributes for startElementHandler
3452 - default attributes
3453 - process namespace declarations (check and report them)
3454 - generate namespace aware element name (URI, prefix)
3455 */
3456 static enum XML_Error
3457 storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
3458 TAG_NAME *tagNamePtr, BINDING **bindingsPtr,
3459 enum XML_Account account) {
3460 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
3461 ELEMENT_TYPE *elementType;
3462 int nDefaultAtts;
3463 const XML_Char **appAtts; /* the attribute list for the application */
3464 int attIndex = 0;
3465 int prefixLen;
3466 int i;
3467 int n;
3468 XML_Char *uri;
3469 int nPrefixes = 0;
3470 BINDING *binding;
3471 const XML_Char *localPart;
3472
3473 /* lookup the element type name */
3474 elementType
3475 = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str, 0);
3476 if (! elementType) {
3477 const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str);
3478 if (! name)
3479 return XML_ERROR_NO_MEMORY;
3480 elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
3481 sizeof(ELEMENT_TYPE));
3482 if (! elementType)
3483 return XML_ERROR_NO_MEMORY;
3484 if (parser->m_ns && ! setElementTypePrefix(parser, elementType))
3485 return XML_ERROR_NO_MEMORY;
3486 }
3487 nDefaultAtts = elementType->nDefaultAtts;
3488
3489 /* get the attributes from the tokenizer */
3490 n = XmlGetAttributes(enc, attStr, parser->m_attsSize, parser->m_atts);
3491
3492 /* Detect and prevent integer overflow */
3493 if (n > INT_MAX - nDefaultAtts) {
3494 return XML_ERROR_NO_MEMORY;
3495 }
3496
3497 if (n + nDefaultAtts > parser->m_attsSize) {
3498 int oldAttsSize = parser->m_attsSize;
3499 ATTRIBUTE *temp;
3500 #ifdef XML_ATTR_INFO
3501 XML_AttrInfo *temp2;
3502 #endif
3503
3504 /* Detect and prevent integer overflow */
3505 if ((nDefaultAtts > INT_MAX - INIT_ATTS_SIZE)
3506 || (n > INT_MAX - (nDefaultAtts + INIT_ATTS_SIZE))) {
3507 return XML_ERROR_NO_MEMORY;
3508 }
3509
3510 parser->m_attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
3511
3512 /* Detect and prevent integer overflow.
3513 * The preprocessor guard addresses the "always false" warning
3514 * from -Wtype-limits on platforms where
3515 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3516 #if UINT_MAX >= SIZE_MAX
3517 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) {
3518 parser->m_attsSize = oldAttsSize;
3519 return XML_ERROR_NO_MEMORY;
3520 }
3521 #endif
3522
3523 temp = (ATTRIBUTE *)REALLOC(parser, (void *)parser->m_atts,
3524 parser->m_attsSize * sizeof(ATTRIBUTE));
3525 if (temp == NULL) {
3526 parser->m_attsSize = oldAttsSize;
3527 return XML_ERROR_NO_MEMORY;
3528 }
3529 parser->m_atts = temp;
3530 #ifdef XML_ATTR_INFO
3531 /* Detect and prevent integer overflow.
3532 * The preprocessor guard addresses the "always false" warning
3533 * from -Wtype-limits on platforms where
3534 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3535 # if UINT_MAX >= SIZE_MAX
3536 if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) {
3537 parser->m_attsSize = oldAttsSize;
3538 return XML_ERROR_NO_MEMORY;
3539 }
3540 # endif
3541
3542 temp2 = (XML_AttrInfo *)REALLOC(parser, (void *)parser->m_attInfo,
3543 parser->m_attsSize * sizeof(XML_AttrInfo));
3544 if (temp2 == NULL) {
3545 parser->m_attsSize = oldAttsSize;
3546 return XML_ERROR_NO_MEMORY;
3547 }
3548 parser->m_attInfo = temp2;
3549 #endif
3550 if (n > oldAttsSize)
3551 XmlGetAttributes(enc, attStr, n, parser->m_atts);
3552 }
3553
3554 appAtts = (const XML_Char **)parser->m_atts;
3555 for (i = 0; i < n; i++) {
3556 ATTRIBUTE *currAtt = &parser->m_atts[i];
3557 #ifdef XML_ATTR_INFO
3558 XML_AttrInfo *currAttInfo = &parser->m_attInfo[i];
3559 #endif
3560 /* add the name and value to the attribute list */
3561 ATTRIBUTE_ID *attId
3562 = getAttributeId(parser, enc, currAtt->name,
3563 currAtt->name + XmlNameLength(enc, currAtt->name));
3564 if (! attId)
3565 return XML_ERROR_NO_MEMORY;
3566 #ifdef XML_ATTR_INFO
3567 currAttInfo->nameStart
3568 = parser->m_parseEndByteIndex - (parser->m_parseEndPtr - currAtt->name);
3569 currAttInfo->nameEnd
3570 = currAttInfo->nameStart + XmlNameLength(enc, currAtt->name);
3571 currAttInfo->valueStart = parser->m_parseEndByteIndex
3572 - (parser->m_parseEndPtr - currAtt->valuePtr);
3573 currAttInfo->valueEnd = parser->m_parseEndByteIndex
3574 - (parser->m_parseEndPtr - currAtt->valueEnd);
3575 #endif
3576 /* Detect duplicate attributes by their QNames. This does not work when
3577 namespace processing is turned on and different prefixes for the same
3578 namespace are used. For this case we have a check further down.
3579 */
3580 if ((attId->name)[-1]) {
3581 if (enc == parser->m_encoding)
3582 parser->m_eventPtr = parser->m_atts[i].name;
3583 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3584 }
3585 (attId->name)[-1] = 1;
3586 appAtts[attIndex++] = attId->name;
3587 if (! parser->m_atts[i].normalized) {
3588 enum XML_Error result;
3589 XML_Bool isCdata = XML_TRUE;
3590
3591 /* figure out whether declared as other than CDATA */
3592 if (attId->maybeTokenized) {
3593 int j;
3594 for (j = 0; j < nDefaultAtts; j++) {
3595 if (attId == elementType->defaultAtts[j].id) {
3596 isCdata = elementType->defaultAtts[j].isCdata;
3597 break;
3598 }
3599 }
3600 }
3601
3602 /* normalize the attribute value */
3603 result = storeAttributeValue(
3604 parser, enc, isCdata, parser->m_atts[i].valuePtr,
3605 parser->m_atts[i].valueEnd, &parser->m_tempPool, account);
3606 if (result)
3607 return result;
3608 appAtts[attIndex] = poolStart(&parser->m_tempPool);
3609 poolFinish(&parser->m_tempPool);
3610 } else {
3611 /* the value did not need normalizing */
3612 appAtts[attIndex] = poolStoreString(&parser->m_tempPool, enc,
3613 parser->m_atts[i].valuePtr,
3614 parser->m_atts[i].valueEnd);
3615 if (appAtts[attIndex] == 0)
3616 return XML_ERROR_NO_MEMORY;
3617 poolFinish(&parser->m_tempPool);
3618 }
3619 /* handle prefixed attribute names */
3620 if (attId->prefix) {
3621 if (attId->xmlns) {
3622 /* deal with namespace declarations here */
3623 enum XML_Error result = addBinding(parser, attId->prefix, attId,
3624 appAtts[attIndex], bindingsPtr);
3625 if (result)
3626 return result;
3627 --attIndex;
3628 } else {
3629 /* deal with other prefixed names later */
3630 attIndex++;
3631 nPrefixes++;
3632 (attId->name)[-1] = 2;
3633 }
3634 } else
3635 attIndex++;
3636 }
3637
3638 /* set-up for XML_GetSpecifiedAttributeCount and XML_GetIdAttributeIndex */
3639 parser->m_nSpecifiedAtts = attIndex;
3640 if (elementType->idAtt && (elementType->idAtt->name)[-1]) {
3641 for (i = 0; i < attIndex; i += 2)
3642 if (appAtts[i] == elementType->idAtt->name) {
3643 parser->m_idAttIndex = i;
3644 break;
3645 }
3646 } else
3647 parser->m_idAttIndex = -1;
3648
3649 /* do attribute defaulting */
3650 for (i = 0; i < nDefaultAtts; i++) {
3651 const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + i;
3652 if (! (da->id->name)[-1] && da->value) {
3653 if (da->id->prefix) {
3654 if (da->id->xmlns) {
3655 enum XML_Error result = addBinding(parser, da->id->prefix, da->id,
3656 da->value, bindingsPtr);
3657 if (result)
3658 return result;
3659 } else {
3660 (da->id->name)[-1] = 2;
3661 nPrefixes++;
3662 appAtts[attIndex++] = da->id->name;
3663 appAtts[attIndex++] = da->value;
3664 }
3665 } else {
3666 (da->id->name)[-1] = 1;
3667 appAtts[attIndex++] = da->id->name;
3668 appAtts[attIndex++] = da->value;
3669 }
3670 }
3671 }
3672 appAtts[attIndex] = 0;
3673
3674 /* expand prefixed attribute names, check for duplicates,
3675 and clear flags that say whether attributes were specified */
3676 i = 0;
3677 if (nPrefixes) {
3678 int j; /* hash table index */
3679 unsigned long version = parser->m_nsAttsVersion;
3680
3681 /* Detect and prevent invalid shift */
3682 if (parser->m_nsAttsPower >= sizeof(unsigned int) * 8 /* bits per byte */) {
3683 return XML_ERROR_NO_MEMORY;
3684 }
3685
3686 unsigned int nsAttsSize = 1u << parser->m_nsAttsPower;
3687 unsigned char oldNsAttsPower = parser->m_nsAttsPower;
3688 /* size of hash table must be at least 2 * (# of prefixed attributes) */
3689 if ((nPrefixes << 1)
3690 >> parser->m_nsAttsPower) { /* true for m_nsAttsPower = 0 */
3691 NS_ATT *temp;
3692 /* hash table size must also be a power of 2 and >= 8 */
3693 while (nPrefixes >> parser->m_nsAttsPower++)
3694 ;
3695 if (parser->m_nsAttsPower < 3)
3696 parser->m_nsAttsPower = 3;
3697
3698 /* Detect and prevent invalid shift */
3699 if (parser->m_nsAttsPower >= sizeof(nsAttsSize) * 8 /* bits per byte */) {
3700 /* Restore actual size of memory in m_nsAtts */
3701 parser->m_nsAttsPower = oldNsAttsPower;
3702 return XML_ERROR_NO_MEMORY;
3703 }
3704
3705 nsAttsSize = 1u << parser->m_nsAttsPower;
3706
3707 /* Detect and prevent integer overflow.
3708 * The preprocessor guard addresses the "always false" warning
3709 * from -Wtype-limits on platforms where
3710 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3711 #if UINT_MAX >= SIZE_MAX
3712 if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) {
3713 /* Restore actual size of memory in m_nsAtts */
3714 parser->m_nsAttsPower = oldNsAttsPower;
3715 return XML_ERROR_NO_MEMORY;
3716 }
3717 #endif
3718
3719 temp = (NS_ATT *)REALLOC(parser, parser->m_nsAtts,
3720 nsAttsSize * sizeof(NS_ATT));
3721 if (! temp) {
3722 /* Restore actual size of memory in m_nsAtts */
3723 parser->m_nsAttsPower = oldNsAttsPower;
3724 return XML_ERROR_NO_MEMORY;
3725 }
3726 parser->m_nsAtts = temp;
3727 version = 0; /* force re-initialization of m_nsAtts hash table */
3728 }
3729 /* using a version flag saves us from initializing m_nsAtts every time */
3730 if (! version) { /* initialize version flags when version wraps around */
3731 version = INIT_ATTS_VERSION;
3732 for (j = nsAttsSize; j != 0;)
3733 parser->m_nsAtts[--j].version = version;
3734 }
3735 parser->m_nsAttsVersion = --version;
3736
3737 /* expand prefixed names and check for duplicates */
3738 for (; i < attIndex; i += 2) {
3739 const XML_Char *s = appAtts[i];
3740 if (s[-1] == 2) { /* prefixed */
3741 ATTRIBUTE_ID *id;
3742 const BINDING *b;
3743 unsigned long uriHash;
3744 struct siphash sip_state;
3745 struct sipkey sip_key;
3746
3747 copy_salt_to_sipkey(parser, &sip_key);
3748 sip24_init(&sip_state, &sip_key);
3749
3750 ((XML_Char *)s)[-1] = 0; /* clear flag */
3751 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
3752 if (! id || ! id->prefix) {
3753 /* This code is walking through the appAtts array, dealing
3754 * with (in this case) a prefixed attribute name. To be in
3755 * the array, the attribute must have already been bound, so
3756 * has to have passed through the hash table lookup once
3757 * already. That implies that an entry for it already
3758 * exists, so the lookup above will return a pointer to
3759 * already allocated memory. There is no opportunaity for
3760 * the allocator to fail, so the condition above cannot be
3761 * fulfilled.
3762 *
3763 * Since it is difficult to be certain that the above
3764 * analysis is complete, we retain the test and merely
3765 * remove the code from coverage tests.
3766 */
3767 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
3768 }
3769 b = id->prefix->binding;
3770 if (! b)
3771 return XML_ERROR_UNBOUND_PREFIX;
3772
3773 for (j = 0; j < b->uriLen; j++) {
3774 const XML_Char c = b->uri[j];
3775 if (! poolAppendChar(&parser->m_tempPool, c))
3776 return XML_ERROR_NO_MEMORY;
3777 }
3778
3779 sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char));
3780
3781 while (*s++ != XML_T(ASCII_COLON))
3782 ;
3783
3784 sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char));
3785
3786 do { /* copies null terminator */
3787 if (! poolAppendChar(&parser->m_tempPool, *s))
3788 return XML_ERROR_NO_MEMORY;
3789 } while (*s++);
3790
3791 uriHash = (unsigned long)sip24_final(&sip_state);
3792
3793 { /* Check hash table for duplicate of expanded name (uriName).
3794 Derived from code in lookup(parser, HASH_TABLE *table, ...).
3795 */
3796 unsigned char step = 0;
3797 unsigned long mask = nsAttsSize - 1;
3798 j = uriHash & mask; /* index into hash table */
3799 while (parser->m_nsAtts[j].version == version) {
3800 /* for speed we compare stored hash values first */
3801 if (uriHash == parser->m_nsAtts[j].hash) {
3802 const XML_Char *s1 = poolStart(&parser->m_tempPool);
3803 const XML_Char *s2 = parser->m_nsAtts[j].uriName;
3804 /* s1 is null terminated, but not s2 */
3805 for (; *s1 == *s2 && *s1 != 0; s1++, s2++)
3806 ;
3807 if (*s1 == 0)
3808 return XML_ERROR_DUPLICATE_ATTRIBUTE;
3809 }
3810 if (! step)
3811 step = PROBE_STEP(uriHash, mask, parser->m_nsAttsPower);
3812 j < step ? (j += nsAttsSize - step) : (j -= step);
3813 }
3814 }
3815
3816 if (parser->m_ns_triplets) { /* append namespace separator and prefix */
3817 parser->m_tempPool.ptr[-1] = parser->m_namespaceSeparator;
3818 s = b->prefix->name;
3819 do {
3820 if (! poolAppendChar(&parser->m_tempPool, *s))
3821 return XML_ERROR_NO_MEMORY;
3822 } while (*s++);
3823 }
3824
3825 /* store expanded name in attribute list */
3826 s = poolStart(&parser->m_tempPool);
3827 poolFinish(&parser->m_tempPool);
3828 appAtts[i] = s;
3829
3830 /* fill empty slot with new version, uriName and hash value */
3831 parser->m_nsAtts[j].version = version;
3832 parser->m_nsAtts[j].hash = uriHash;
3833 parser->m_nsAtts[j].uriName = s;
3834
3835 if (! --nPrefixes) {
3836 i += 2;
3837 break;
3838 }
3839 } else /* not prefixed */
3840 ((XML_Char *)s)[-1] = 0; /* clear flag */
3841 }
3842 }
3843 /* clear flags for the remaining attributes */
3844 for (; i < attIndex; i += 2)
3845 ((XML_Char *)(appAtts[i]))[-1] = 0;
3846 for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
3847 binding->attId->name[-1] = 0;
3848
3849 if (! parser->m_ns)
3850 return XML_ERROR_NONE;
3851
3852 /* expand the element type name */
3853 if (elementType->prefix) {
3854 binding = elementType->prefix->binding;
3855 if (! binding)
3856 return XML_ERROR_UNBOUND_PREFIX;
3857 localPart = tagNamePtr->str;
3858 while (*localPart++ != XML_T(ASCII_COLON))
3859 ;
3860 } else if (dtd->defaultPrefix.binding) {
3861 binding = dtd->defaultPrefix.binding;
3862 localPart = tagNamePtr->str;
3863 } else
3864 return XML_ERROR_NONE;
3865 prefixLen = 0;
3866 if (parser->m_ns_triplets && binding->prefix->name) {
3867 for (; binding->prefix->name[prefixLen++];)
3868 ; /* prefixLen includes null terminator */
3869 }
3870 tagNamePtr->localPart = localPart;
3871 tagNamePtr->uriLen = binding->uriLen;
3872 tagNamePtr->prefix = binding->prefix->name;
3873 tagNamePtr->prefixLen = prefixLen;
3874 for (i = 0; localPart[i++];)
3875 ; /* i includes null terminator */
3876
3877 /* Detect and prevent integer overflow */
3878 if (binding->uriLen > INT_MAX - prefixLen
3879 || i > INT_MAX - (binding->uriLen + prefixLen)) {
3880 return XML_ERROR_NO_MEMORY;
3881 }
3882
3883 n = i + binding->uriLen + prefixLen;
3884 if (n > binding->uriAlloc) {
3885 TAG *p;
3886
3887 /* Detect and prevent integer overflow */
3888 if (n > INT_MAX - EXPAND_SPARE) {
3889 return XML_ERROR_NO_MEMORY;
3890 }
3891 /* Detect and prevent integer overflow.
3892 * The preprocessor guard addresses the "always false" warning
3893 * from -Wtype-limits on platforms where
3894 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
3895 #if UINT_MAX >= SIZE_MAX
3896 if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
3897 return XML_ERROR_NO_MEMORY;
3898 }
3899 #endif
3900
3901 uri = (XML_Char *)MALLOC(parser, (n + EXPAND_SPARE) * sizeof(XML_Char));
3902 if (! uri)
3903 return XML_ERROR_NO_MEMORY;
3904 binding->uriAlloc = n + EXPAND_SPARE;
3905 memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char));
3906 for (p = parser->m_tagStack; p; p = p->parent)
3907 if (p->name.str == binding->uri)
3908 p->name.str = uri;
3909 FREE(parser, binding->uri);
3910 binding->uri = uri;
3911 }
3912 /* if m_namespaceSeparator != '\0' then uri includes it already */
3913 uri = binding->uri + binding->uriLen;
3914 memcpy(uri, localPart, i * sizeof(XML_Char));
3915 /* we always have a namespace separator between localPart and prefix */
3916 if (prefixLen) {
3917 uri += i - 1;
3918 *uri = parser->m_namespaceSeparator; /* replace null terminator */
3919 memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char));
3920 }
3921 tagNamePtr->str = binding->uri;
3922 return XML_ERROR_NONE;
3923 }
3924
3925 static XML_Bool
3926 is_rfc3986_uri_char(XML_Char candidate) {
3927 // For the RFC 3986 ANBF grammar see
3928 // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
3929
3930 switch (candidate) {
3931 // From rule "ALPHA" (uppercase half)
3932 case 'A':
3933 case 'B':
3934 case 'C':
3935 case 'D':
3936 case 'E':
3937 case 'F':
3938 case 'G':
3939 case 'H':
3940 case 'I':
3941 case 'J':
3942 case 'K':
3943 case 'L':
3944 case 'M':
3945 case 'N':
3946 case 'O':
3947 case 'P':
3948 case 'Q':
3949 case 'R':
3950 case 'S':
3951 case 'T':
3952 case 'U':
3953 case 'V':
3954 case 'W':
3955 case 'X':
3956 case 'Y':
3957 case 'Z':
3958
3959 // From rule "ALPHA" (lowercase half)
3960 case 'a':
3961 case 'b':
3962 case 'c':
3963 case 'd':
3964 case 'e':
3965 case 'f':
3966 case 'g':
3967 case 'h':
3968 case 'i':
3969 case 'j':
3970 case 'k':
3971 case 'l':
3972 case 'm':
3973 case 'n':
3974 case 'o':
3975 case 'p':
3976 case 'q':
3977 case 'r':
3978 case 's':
3979 case 't':
3980 case 'u':
3981 case 'v':
3982 case 'w':
3983 case 'x':
3984 case 'y':
3985 case 'z':
3986
3987 // From rule "DIGIT"
3988 case '0':
3989 case '1':
3990 case '2':
3991 case '3':
3992 case '4':
3993 case '5':
3994 case '6':
3995 case '7':
3996 case '8':
3997 case '9':
3998
3999 // From rule "pct-encoded"
4000 case '%':
4001
4002 // From rule "unreserved"
4003 case '-':
4004 case '.':
4005 case '_':
4006 case '~':
4007
4008 // From rule "gen-delims"
4009 case ':':
4010 case '/':
4011 case '?':
4012 case '#':
4013 case '[':
4014 case ']':
4015 case '@':
4016
4017 // From rule "sub-delims"
4018 case '!':
4019 case '$':
4020 case '&':
4021 case '\'':
4022 case '(':
4023 case ')':
4024 case '*':
4025 case '+':
4026 case ',':
4027 case ';':
4028 case '=':
4029 return XML_TRUE;
4030
4031 default:
4032 return XML_FALSE;
4033 }
4034 }
4035
4036 /* addBinding() overwrites the value of prefix->binding without checking.
4037 Therefore one must keep track of the old value outside of addBinding().
4038 */
4039 static enum XML_Error
4040 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
4041 const XML_Char *uri, BINDING **bindingsPtr) {
4042 // "http://www.w3.org/XML/1998/namespace"
4043 static const XML_Char xmlNamespace[]
4044 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
4045 ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
4046 ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o,
4047 ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M,
4048 ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9,
4049 ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m,
4050 ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
4051 ASCII_e, '\0'};
4052 static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
4053 // "http://www.w3.org/2000/xmlns/"
4054 static const XML_Char xmlnsNamespace[]
4055 = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
4056 ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
4057 ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH,
4058 ASCII_2, ASCII_0, ASCII_0, ASCII_0, ASCII_SLASH, ASCII_x,
4059 ASCII_m, ASCII_l, ASCII_n, ASCII_s, ASCII_SLASH, '\0'};
4060 static const int xmlnsLen
4061 = (int)sizeof(xmlnsNamespace) / sizeof(XML_Char) - 1;
4062
4063 XML_Bool mustBeXML = XML_FALSE;
4064 XML_Bool isXML = XML_TRUE;
4065 XML_Bool isXMLNS = XML_TRUE;
4066
4067 BINDING *b;
4068 int len;
4069
4070 /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */
4071 if (*uri == XML_T('\0') && prefix->name)
4072 return XML_ERROR_UNDECLARING_PREFIX;
4073
4074 if (prefix->name && prefix->name[0] == XML_T(ASCII_x)
4075 && prefix->name[1] == XML_T(ASCII_m)
4076 && prefix->name[2] == XML_T(ASCII_l)) {
4077 /* Not allowed to bind xmlns */
4078 if (prefix->name[3] == XML_T(ASCII_n) && prefix->name[4] == XML_T(ASCII_s)
4079 && prefix->name[5] == XML_T('\0'))
4080 return XML_ERROR_RESERVED_PREFIX_XMLNS;
4081
4082 if (prefix->name[3] == XML_T('\0'))
4083 mustBeXML = XML_TRUE;
4084 }
4085
4086 for (len = 0; uri[len]; len++) {
4087 if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
4088 isXML = XML_FALSE;
4089
4090 if (! mustBeXML && isXMLNS
4091 && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
4092 isXMLNS = XML_FALSE;
4093
4094 // NOTE: While Expat does not validate namespace URIs against RFC 3986
4095 // today (and is not REQUIRED to do so with regard to the XML 1.0
4096 // namespaces specification) we have to at least make sure, that
4097 // the application on top of Expat (that is likely splitting expanded
4098 // element names ("qualified names") of form
4099 // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
4100 // in its element handler code) cannot be confused by an attacker
4101 // putting additional namespace separator characters into namespace
4102 // declarations. That would be ambiguous and not to be expected.
4103 //
4104 // While the HTML API docs of function XML_ParserCreateNS have been
4105 // advising against use of a namespace separator character that can
4106 // appear in a URI for >20 years now, some widespread applications
4107 // are using URI characters (':' (colon) in particular) for a
4108 // namespace separator, in practice. To keep these applications
4109 // functional, we only reject namespaces URIs containing the
4110 // application-chosen namespace separator if the chosen separator
4111 // is a non-URI character with regard to RFC 3986.
4112 if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
4113 && ! is_rfc3986_uri_char(uri[len])) {
4114 return XML_ERROR_SYNTAX;
4115 }
4116 }
4117 isXML = isXML && len == xmlLen;
4118 isXMLNS = isXMLNS && len == xmlnsLen;
4119
4120 if (mustBeXML != isXML)
4121 return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML
4122 : XML_ERROR_RESERVED_NAMESPACE_URI;
4123
4124 if (isXMLNS)
4125 return XML_ERROR_RESERVED_NAMESPACE_URI;
4126
4127 if (parser->m_namespaceSeparator)
4128 len++;
4129 if (parser->m_freeBindingList) {
4130 b = parser->m_freeBindingList;
4131 if (len > b->uriAlloc) {
4132 /* Detect and prevent integer overflow */
4133 if (len > INT_MAX - EXPAND_SPARE) {
4134 return XML_ERROR_NO_MEMORY;
4135 }
4136
4137 /* Detect and prevent integer overflow.
4138 * The preprocessor guard addresses the "always false" warning
4139 * from -Wtype-limits on platforms where
4140 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4141 #if UINT_MAX >= SIZE_MAX
4142 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4143 return XML_ERROR_NO_MEMORY;
4144 }
4145 #endif
4146
4147 XML_Char *temp = (XML_Char *)REALLOC(
4148 parser, b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
4149 if (temp == NULL)
4150 return XML_ERROR_NO_MEMORY;
4151 b->uri = temp;
4152 b->uriAlloc = len + EXPAND_SPARE;
4153 }
4154 parser->m_freeBindingList = b->nextTagBinding;
4155 } else {
4156 b = (BINDING *)MALLOC(parser, sizeof(BINDING));
4157 if (! b)
4158 return XML_ERROR_NO_MEMORY;
4159
4160 /* Detect and prevent integer overflow */
4161 if (len > INT_MAX - EXPAND_SPARE) {
4162 return XML_ERROR_NO_MEMORY;
4163 }
4164 /* Detect and prevent integer overflow.
4165 * The preprocessor guard addresses the "always false" warning
4166 * from -Wtype-limits on platforms where
4167 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
4168 #if UINT_MAX >= SIZE_MAX
4169 if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) {
4170 return XML_ERROR_NO_MEMORY;
4171 }
4172 #endif
4173
4174 b->uri
4175 = (XML_Char *)MALLOC(parser, sizeof(XML_Char) * (len + EXPAND_SPARE));
4176 if (! b->uri) {
4177 FREE(parser, b);
4178 return XML_ERROR_NO_MEMORY;
4179 }
4180 b->uriAlloc = len + EXPAND_SPARE;
4181 }
4182 b->uriLen = len;
4183 memcpy(b->uri, uri, len * sizeof(XML_Char));
4184 if (parser->m_namespaceSeparator)
4185 b->uri[len - 1] = parser->m_namespaceSeparator;
4186 b->prefix = prefix;
4187 b->attId = attId;
4188 b->prevPrefixBinding = prefix->binding;
4189 /* NULL binding when default namespace undeclared */
4190 if (*uri == XML_T('\0') && prefix == &parser->m_dtd->defaultPrefix)
4191 prefix->binding = NULL;
4192 else
4193 prefix->binding = b;
4194 b->nextTagBinding = *bindingsPtr;
4195 *bindingsPtr = b;
4196 /* if attId == NULL then we are not starting a namespace scope */
4197 if (attId && parser->m_startNamespaceDeclHandler)
4198 parser->m_startNamespaceDeclHandler(parser->m_handlerArg, prefix->name,
4199 prefix->binding ? uri : 0);
4200 return XML_ERROR_NONE;
4201 }
4202
4203 /* The idea here is to avoid using stack for each CDATA section when
4204 the whole file is parsed with one call.
4205 */
4206 static enum XML_Error PTRCALL
4207 cdataSectionProcessor(XML_Parser parser, const char *start, const char *end,
4208 const char **endPtr) {
4209 enum XML_Error result = doCdataSection(
4210 parser, parser->m_encoding, &start, end, endPtr,
4211 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_ACCOUNT_DIRECT);
4212 if (result != XML_ERROR_NONE)
4213 return result;
4214 if (start) {
4215 if (parser->m_parentParser) { /* we are parsing an external entity */
4216 parser->m_processor = externalEntityContentProcessor;
4217 return externalEntityContentProcessor(parser, start, end, endPtr);
4218 } else {
4219 parser->m_processor = contentProcessor;
4220 return contentProcessor(parser, start, end, endPtr);
4221 }
4222 }
4223 return result;
4224 }
4225
4226 /* startPtr gets set to non-null if the section is closed, and to null if
4227 the section is not yet closed.
4228 */
4229 static enum XML_Error
4230 doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4231 const char *end, const char **nextPtr, XML_Bool haveMore,
4232 enum XML_Account account) {
4233 const char *s = *startPtr;
4234 const char **eventPP;
4235 const char **eventEndPP;
4236 if (enc == parser->m_encoding) {
4237 eventPP = &parser->m_eventPtr;
4238 *eventPP = s;
4239 eventEndPP = &parser->m_eventEndPtr;
4240 } else {
4241 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4242 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4243 }
4244 *eventPP = s;
4245 *startPtr = NULL;
4246
4247 for (;;) {
4248 const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4249 int tok = XmlCdataSectionTok(enc, s, end, &next);
4250 #if XML_GE == 1
4251 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4252 accountingOnAbort(parser);
4253 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4254 }
4255 #else
4256 UNUSED_P(account);
4257 #endif
4258 *eventEndPP = next;
4259 switch (tok) {
4260 case XML_TOK_CDATA_SECT_CLOSE:
4261 if (parser->m_endCdataSectionHandler)
4262 parser->m_endCdataSectionHandler(parser->m_handlerArg);
4263 /* BEGIN disabled code */
4264 /* see comment under XML_TOK_CDATA_SECT_OPEN */
4265 else if ((0) && parser->m_characterDataHandler)
4266 parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4267 0);
4268 /* END disabled code */
4269 else if (parser->m_defaultHandler)
4270 reportDefault(parser, enc, s, next);
4271 *startPtr = next;
4272 *nextPtr = next;
4273 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4274 return XML_ERROR_ABORTED;
4275 else
4276 return XML_ERROR_NONE;
4277 case XML_TOK_DATA_NEWLINE:
4278 if (parser->m_characterDataHandler) {
4279 XML_Char c = 0xA;
4280 parser->m_characterDataHandler(parser->m_handlerArg, &c, 1);
4281 } else if (parser->m_defaultHandler)
4282 reportDefault(parser, enc, s, next);
4283 break;
4284 case XML_TOK_DATA_CHARS: {
4285 XML_CharacterDataHandler charDataHandler = parser->m_characterDataHandler;
4286 if (charDataHandler) {
4287 if (MUST_CONVERT(enc, s)) {
4288 for (;;) {
4289 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
4290 const enum XML_Convert_Result convert_res = XmlConvert(
4291 enc, &s, next, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
4292 *eventEndPP = next;
4293 charDataHandler(parser->m_handlerArg, parser->m_dataBuf,
4294 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
4295 if ((convert_res == XML_CONVERT_COMPLETED)
4296 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
4297 break;
4298 *eventPP = s;
4299 }
4300 } else
4301 charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
4302 (int)((const XML_Char *)next - (const XML_Char *)s));
4303 } else if (parser->m_defaultHandler)
4304 reportDefault(parser, enc, s, next);
4305 } break;
4306 case XML_TOK_INVALID:
4307 *eventPP = next;
4308 return XML_ERROR_INVALID_TOKEN;
4309 case XML_TOK_PARTIAL_CHAR:
4310 if (haveMore) {
4311 *nextPtr = s;
4312 return XML_ERROR_NONE;
4313 }
4314 return XML_ERROR_PARTIAL_CHAR;
4315 case XML_TOK_PARTIAL:
4316 case XML_TOK_NONE:
4317 if (haveMore) {
4318 *nextPtr = s;
4319 return XML_ERROR_NONE;
4320 }
4321 return XML_ERROR_UNCLOSED_CDATA_SECTION;
4322 default:
4323 /* Every token returned by XmlCdataSectionTok() has its own
4324 * explicit case, so this default case will never be executed.
4325 * We retain it as a safety net and exclude it from the coverage
4326 * statistics.
4327 *
4328 * LCOV_EXCL_START
4329 */
4330 *eventPP = next;
4331 return XML_ERROR_UNEXPECTED_STATE;
4332 /* LCOV_EXCL_STOP */
4333 }
4334
4335 *eventPP = s = next;
4336 switch (parser->m_parsingStatus.parsing) {
4337 case XML_SUSPENDED:
4338 *nextPtr = next;
4339 return XML_ERROR_NONE;
4340 case XML_FINISHED:
4341 return XML_ERROR_ABORTED;
4342 case XML_PARSING:
4343 if (parser->m_reenter) {
4344 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
4345 }
4346 /* Fall through */
4347 default:;
4348 }
4349 }
4350 /* not reached */
4351 }
4352
4353 #ifdef XML_DTD
4354
4355 /* The idea here is to avoid using stack for each IGNORE section when
4356 the whole file is parsed with one call.
4357 */
4358 static enum XML_Error PTRCALL
4359 ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end,
4360 const char **endPtr) {
4361 enum XML_Error result
4362 = doIgnoreSection(parser, parser->m_encoding, &start, end, endPtr,
4363 (XML_Bool)! parser->m_parsingStatus.finalBuffer);
4364 if (result != XML_ERROR_NONE)
4365 return result;
4366 if (start) {
4367 parser->m_processor = prologProcessor;
4368 return prologProcessor(parser, start, end, endPtr);
4369 }
4370 return result;
4371 }
4372
4373 /* startPtr gets set to non-null is the section is closed, and to null
4374 if the section is not yet closed.
4375 */
4376 static enum XML_Error
4377 doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
4378 const char *end, const char **nextPtr, XML_Bool haveMore) {
4379 const char *next = *startPtr; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
4380 int tok;
4381 const char *s = *startPtr;
4382 const char **eventPP;
4383 const char **eventEndPP;
4384 if (enc == parser->m_encoding) {
4385 eventPP = &parser->m_eventPtr;
4386 *eventPP = s;
4387 eventEndPP = &parser->m_eventEndPtr;
4388 } else {
4389 /* It's not entirely clear, but it seems the following two lines
4390 * of code cannot be executed. The only occasions on which 'enc'
4391 * is not 'encoding' are when this function is called
4392 * from the internal entity processing, and IGNORE sections are an
4393 * error in internal entities.
4394 *
4395 * Since it really isn't clear that this is true, we keep the code
4396 * and just remove it from our coverage tests.
4397 *
4398 * LCOV_EXCL_START
4399 */
4400 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4401 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4402 /* LCOV_EXCL_STOP */
4403 }
4404 *eventPP = s;
4405 *startPtr = NULL;
4406 tok = XmlIgnoreSectionTok(enc, s, end, &next);
4407 # if XML_GE == 1
4408 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4409 XML_ACCOUNT_DIRECT)) {
4410 accountingOnAbort(parser);
4411 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4412 }
4413 # endif
4414 *eventEndPP = next;
4415 switch (tok) {
4416 case XML_TOK_IGNORE_SECT:
4417 if (parser->m_defaultHandler)
4418 reportDefault(parser, enc, s, next);
4419 *startPtr = next;
4420 *nextPtr = next;
4421 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4422 return XML_ERROR_ABORTED;
4423 else
4424 return XML_ERROR_NONE;
4425 case XML_TOK_INVALID:
4426 *eventPP = next;
4427 return XML_ERROR_INVALID_TOKEN;
4428 case XML_TOK_PARTIAL_CHAR:
4429 if (haveMore) {
4430 *nextPtr = s;
4431 return XML_ERROR_NONE;
4432 }
4433 return XML_ERROR_PARTIAL_CHAR;
4434 case XML_TOK_PARTIAL:
4435 case XML_TOK_NONE:
4436 if (haveMore) {
4437 *nextPtr = s;
4438 return XML_ERROR_NONE;
4439 }
4440 return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
4441 default:
4442 /* All of the tokens that XmlIgnoreSectionTok() returns have
4443 * explicit cases to handle them, so this default case is never
4444 * executed. We keep it as a safety net anyway, and remove it
4445 * from our test coverage statistics.
4446 *
4447 * LCOV_EXCL_START
4448 */
4449 *eventPP = next;
4450 return XML_ERROR_UNEXPECTED_STATE;
4451 /* LCOV_EXCL_STOP */
4452 }
4453 /* not reached */
4454 }
4455
4456 #endif /* XML_DTD */
4457
4458 static enum XML_Error
4459 initializeEncoding(XML_Parser parser) {
4460 const char *s;
4461 #ifdef XML_UNICODE
4462 char encodingBuf[128];
4463 /* See comments about `protocolEncodingName` in parserInit() */
4464 if (! parser->m_protocolEncodingName)
4465 s = NULL;
4466 else {
4467 int i;
4468 for (i = 0; parser->m_protocolEncodingName[i]; i++) {
4469 if (i == sizeof(encodingBuf) - 1
4470 || (parser->m_protocolEncodingName[i] & ~0x7f) != 0) {
4471 encodingBuf[0] = '\0';
4472 break;
4473 }
4474 encodingBuf[i] = (char)parser->m_protocolEncodingName[i];
4475 }
4476 encodingBuf[i] = '\0';
4477 s = encodingBuf;
4478 }
4479 #else
4480 s = parser->m_protocolEncodingName;
4481 #endif
4482 if ((parser->m_ns ? XmlInitEncodingNS : XmlInitEncoding)(
4483 &parser->m_initEncoding, &parser->m_encoding, s))
4484 return XML_ERROR_NONE;
4485 return handleUnknownEncoding(parser, parser->m_protocolEncodingName);
4486 }
4487
4488 static enum XML_Error
4489 processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
4490 const char *next) {
4491 const char *encodingName = NULL;
4492 const XML_Char *storedEncName = NULL;
4493 const ENCODING *newEncoding = NULL;
4494 const char *version = NULL;
4495 const char *versionend = NULL;
4496 const XML_Char *storedversion = NULL;
4497 int standalone = -1;
4498
4499 #if XML_GE == 1
4500 if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
4501 XML_ACCOUNT_DIRECT)) {
4502 accountingOnAbort(parser);
4503 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4504 }
4505 #endif
4506
4507 if (! (parser->m_ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(
4508 isGeneralTextEntity, parser->m_encoding, s, next, &parser->m_eventPtr,
4509 &version, &versionend, &encodingName, &newEncoding, &standalone)) {
4510 if (isGeneralTextEntity)
4511 return XML_ERROR_TEXT_DECL;
4512 else
4513 return XML_ERROR_XML_DECL;
4514 }
4515 if (! isGeneralTextEntity && standalone == 1) {
4516 parser->m_dtd->standalone = XML_TRUE;
4517 #ifdef XML_DTD
4518 if (parser->m_paramEntityParsing
4519 == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
4520 parser->m_paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
4521 #endif /* XML_DTD */
4522 }
4523 if (parser->m_xmlDeclHandler) {
4524 if (encodingName != NULL) {
4525 storedEncName = poolStoreString(
4526 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4527 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4528 if (! storedEncName)
4529 return XML_ERROR_NO_MEMORY;
4530 poolFinish(&parser->m_temp2Pool);
4531 }
4532 if (version) {
4533 storedversion
4534 = poolStoreString(&parser->m_temp2Pool, parser->m_encoding, version,
4535 versionend - parser->m_encoding->minBytesPerChar);
4536 if (! storedversion)
4537 return XML_ERROR_NO_MEMORY;
4538 }
4539 parser->m_xmlDeclHandler(parser->m_handlerArg, storedversion, storedEncName,
4540 standalone);
4541 } else if (parser->m_defaultHandler)
4542 reportDefault(parser, parser->m_encoding, s, next);
4543 if (parser->m_protocolEncodingName == NULL) {
4544 if (newEncoding) {
4545 /* Check that the specified encoding does not conflict with what
4546 * the parser has already deduced. Do we have the same number
4547 * of bytes in the smallest representation of a character? If
4548 * this is UTF-16, is it the same endianness?
4549 */
4550 if (newEncoding->minBytesPerChar != parser->m_encoding->minBytesPerChar
4551 || (newEncoding->minBytesPerChar == 2
4552 && newEncoding != parser->m_encoding)) {
4553 parser->m_eventPtr = encodingName;
4554 return XML_ERROR_INCORRECT_ENCODING;
4555 }
4556 parser->m_encoding = newEncoding;
4557 } else if (encodingName) {
4558 enum XML_Error result;
4559 if (! storedEncName) {
4560 storedEncName = poolStoreString(
4561 &parser->m_temp2Pool, parser->m_encoding, encodingName,
4562 encodingName + XmlNameLength(parser->m_encoding, encodingName));
4563 if (! storedEncName)
4564 return XML_ERROR_NO_MEMORY;
4565 }
4566 result = handleUnknownEncoding(parser, storedEncName);
4567 poolClear(&parser->m_temp2Pool);
4568 if (result == XML_ERROR_UNKNOWN_ENCODING)
4569 parser->m_eventPtr = encodingName;
4570 return result;
4571 }
4572 }
4573
4574 if (storedEncName || storedversion)
4575 poolClear(&parser->m_temp2Pool);
4576
4577 return XML_ERROR_NONE;
4578 }
4579
4580 static enum XML_Error
4581 handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) {
4582 if (parser->m_unknownEncodingHandler) {
4583 XML_Encoding info;
4584 int i;
4585 for (i = 0; i < 256; i++)
4586 info.map[i] = -1;
4587 info.convert = NULL;
4588 info.data = NULL;
4589 info.release = NULL;
4590 if (parser->m_unknownEncodingHandler(parser->m_unknownEncodingHandlerData,
4591 encodingName, &info)) {
4592 ENCODING *enc;
4593 parser->m_unknownEncodingMem = MALLOC(parser, XmlSizeOfUnknownEncoding());
4594 if (! parser->m_unknownEncodingMem) {
4595 if (info.release)
4596 info.release(info.data);
4597 return XML_ERROR_NO_MEMORY;
4598 }
4599 enc = (parser->m_ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(
4600 parser->m_unknownEncodingMem, info.map, info.convert, info.data);
4601 if (enc) {
4602 parser->m_unknownEncodingData = info.data;
4603 parser->m_unknownEncodingRelease = info.release;
4604 parser->m_encoding = enc;
4605 return XML_ERROR_NONE;
4606 }
4607 }
4608 if (info.release != NULL)
4609 info.release(info.data);
4610 }
4611 return XML_ERROR_UNKNOWN_ENCODING;
4612 }
4613
4614 static enum XML_Error PTRCALL
4615 prologInitProcessor(XML_Parser parser, const char *s, const char *end,
4616 const char **nextPtr) {
4617 enum XML_Error result = initializeEncoding(parser);
4618 if (result != XML_ERROR_NONE)
4619 return result;
4620 parser->m_processor = prologProcessor;
4621 return prologProcessor(parser, s, end, nextPtr);
4622 }
4623
4624 #ifdef XML_DTD
4625
4626 static enum XML_Error PTRCALL
4627 externalParEntInitProcessor(XML_Parser parser, const char *s, const char *end,
4628 const char **nextPtr) {
4629 enum XML_Error result = initializeEncoding(parser);
4630 if (result != XML_ERROR_NONE)
4631 return result;
4632
4633 /* we know now that XML_Parse(Buffer) has been called,
4634 so we consider the external parameter entity read */
4635 parser->m_dtd->paramEntityRead = XML_TRUE;
4636
4637 if (parser->m_prologState.inEntityValue) {
4638 parser->m_processor = entityValueInitProcessor;
4639 return entityValueInitProcessor(parser, s, end, nextPtr);
4640 } else {
4641 parser->m_processor = externalParEntProcessor;
4642 return externalParEntProcessor(parser, s, end, nextPtr);
4643 }
4644 }
4645
4646 static enum XML_Error PTRCALL
4647 entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
4648 const char **nextPtr) {
4649 int tok;
4650 const char *start = s;
4651 const char *next = start;
4652 parser->m_eventPtr = start;
4653
4654 for (;;) {
4655 tok = XmlPrologTok(parser->m_encoding, start, end, &next);
4656 /* Note: Except for XML_TOK_BOM below, these bytes are accounted later in:
4657 - storeEntityValue
4658 - processXmlDecl
4659 */
4660 parser->m_eventEndPtr = next;
4661 if (tok <= 0) {
4662 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4663 *nextPtr = s;
4664 return XML_ERROR_NONE;
4665 }
4666 switch (tok) {
4667 case XML_TOK_INVALID:
4668 return XML_ERROR_INVALID_TOKEN;
4669 case XML_TOK_PARTIAL:
4670 return XML_ERROR_UNCLOSED_TOKEN;
4671 case XML_TOK_PARTIAL_CHAR:
4672 return XML_ERROR_PARTIAL_CHAR;
4673 case XML_TOK_NONE: /* start == end */
4674 default:
4675 break;
4676 }
4677 /* found end of entity value - can store it now */
4678 return storeEntityValue(parser, parser->m_encoding, s, end,
4679 XML_ACCOUNT_DIRECT, NULL);
4680 } else if (tok == XML_TOK_XML_DECL) {
4681 enum XML_Error result;
4682 result = processXmlDecl(parser, 0, start, next);
4683 if (result != XML_ERROR_NONE)
4684 return result;
4685 /* At this point, m_parsingStatus.parsing cannot be XML_SUSPENDED. For
4686 * that to happen, a parameter entity parsing handler must have attempted
4687 * to suspend the parser, which fails and raises an error. The parser can
4688 * be aborted, but can't be suspended.
4689 */
4690 if (parser->m_parsingStatus.parsing == XML_FINISHED)
4691 return XML_ERROR_ABORTED;
4692 *nextPtr = next;
4693 /* stop scanning for text declaration - we found one */
4694 parser->m_processor = entityValueProcessor;
4695 return entityValueProcessor(parser, next, end, nextPtr);
4696 }
4697 /* XmlPrologTok has now set the encoding based on the BOM it found, and we
4698 must move s and nextPtr forward to consume the BOM.
4699
4700 If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
4701 would leave the BOM in the buffer and return. On the next call to this
4702 function, our XmlPrologTok call would return XML_TOK_INVALID, since it
4703 is not valid to have multiple BOMs.
4704 */
4705 else if (tok == XML_TOK_BOM) {
4706 # if XML_GE == 1
4707 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4708 XML_ACCOUNT_DIRECT)) {
4709 accountingOnAbort(parser);
4710 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4711 }
4712 # endif
4713
4714 *nextPtr = next;
4715 s = next;
4716 }
4717 /* If we get this token, we have the start of what might be a
4718 normal tag, but not a declaration (i.e. it doesn't begin with
4719 "<!"). In a DTD context, that isn't legal.
4720 */
4721 else if (tok == XML_TOK_INSTANCE_START) {
4722 *nextPtr = next;
4723 return XML_ERROR_SYNTAX;
4724 }
4725 start = next;
4726 parser->m_eventPtr = start;
4727 }
4728 }
4729
4730 static enum XML_Error PTRCALL
4731 externalParEntProcessor(XML_Parser parser, const char *s, const char *end,
4732 const char **nextPtr) {
4733 const char *next = s;
4734 int tok;
4735
4736 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4737 if (tok <= 0) {
4738 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4739 *nextPtr = s;
4740 return XML_ERROR_NONE;
4741 }
4742 switch (tok) {
4743 case XML_TOK_INVALID:
4744 return XML_ERROR_INVALID_TOKEN;
4745 case XML_TOK_PARTIAL:
4746 return XML_ERROR_UNCLOSED_TOKEN;
4747 case XML_TOK_PARTIAL_CHAR:
4748 return XML_ERROR_PARTIAL_CHAR;
4749 case XML_TOK_NONE: /* start == end */
4750 default:
4751 break;
4752 }
4753 }
4754 /* This would cause the next stage, i.e. doProlog to be passed XML_TOK_BOM.
4755 However, when parsing an external subset, doProlog will not accept a BOM
4756 as valid, and report a syntax error, so we have to skip the BOM, and
4757 account for the BOM bytes.
4758 */
4759 else if (tok == XML_TOK_BOM) {
4760 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
4761 XML_ACCOUNT_DIRECT)) {
4762 accountingOnAbort(parser);
4763 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4764 }
4765
4766 s = next;
4767 tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4768 }
4769
4770 parser->m_processor = prologProcessor;
4771 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4772 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4773 XML_ACCOUNT_DIRECT);
4774 }
4775
4776 static enum XML_Error PTRCALL
4777 entityValueProcessor(XML_Parser parser, const char *s, const char *end,
4778 const char **nextPtr) {
4779 const char *start = s;
4780 const char *next = s;
4781 const ENCODING *enc = parser->m_encoding;
4782 int tok;
4783
4784 for (;;) {
4785 tok = XmlPrologTok(enc, start, end, &next);
4786 /* Note: These bytes are accounted later in:
4787 - storeEntityValue
4788 */
4789 if (tok <= 0) {
4790 if (! parser->m_parsingStatus.finalBuffer && tok != XML_TOK_INVALID) {
4791 *nextPtr = s;
4792 return XML_ERROR_NONE;
4793 }
4794 switch (tok) {
4795 case XML_TOK_INVALID:
4796 return XML_ERROR_INVALID_TOKEN;
4797 case XML_TOK_PARTIAL:
4798 return XML_ERROR_UNCLOSED_TOKEN;
4799 case XML_TOK_PARTIAL_CHAR:
4800 return XML_ERROR_PARTIAL_CHAR;
4801 case XML_TOK_NONE: /* start == end */
4802 default:
4803 break;
4804 }
4805 /* found end of entity value - can store it now */
4806 return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL);
4807 }
4808 start = next;
4809 }
4810 }
4811
4812 #endif /* XML_DTD */
4813
4814 static enum XML_Error PTRCALL
4815 prologProcessor(XML_Parser parser, const char *s, const char *end,
4816 const char **nextPtr) {
4817 const char *next = s;
4818 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
4819 return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr,
4820 (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE,
4821 XML_ACCOUNT_DIRECT);
4822 }
4823
4824 static enum XML_Error
4825 doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
4826 int tok, const char *next, const char **nextPtr, XML_Bool haveMore,
4827 XML_Bool allowClosingDoctype, enum XML_Account account) {
4828 #ifdef XML_DTD
4829 static const XML_Char externalSubsetName[] = {ASCII_HASH, '\0'};
4830 #endif /* XML_DTD */
4831 static const XML_Char atypeCDATA[]
4832 = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'};
4833 static const XML_Char atypeID[] = {ASCII_I, ASCII_D, '\0'};
4834 static const XML_Char atypeIDREF[]
4835 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'};
4836 static const XML_Char atypeIDREFS[]
4837 = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'};
4838 static const XML_Char atypeENTITY[]
4839 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'};
4840 static const XML_Char atypeENTITIES[]
4841 = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T,
4842 ASCII_I, ASCII_E, ASCII_S, '\0'};
4843 static const XML_Char atypeNMTOKEN[]
4844 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'};
4845 static const XML_Char atypeNMTOKENS[]
4846 = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K,
4847 ASCII_E, ASCII_N, ASCII_S, '\0'};
4848 static const XML_Char notationPrefix[]
4849 = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T,
4850 ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0'};
4851 static const XML_Char enumValueSep[] = {ASCII_PIPE, '\0'};
4852 static const XML_Char enumValueStart[] = {ASCII_LPAREN, '\0'};
4853
4854 #ifndef XML_DTD
4855 UNUSED_P(account);
4856 #endif
4857
4858 /* save one level of indirection */
4859 DTD *const dtd = parser->m_dtd;
4860
4861 const char **eventPP;
4862 const char **eventEndPP;
4863 enum XML_Content_Quant quant;
4864
4865 if (enc == parser->m_encoding) {
4866 eventPP = &parser->m_eventPtr;
4867 eventEndPP = &parser->m_eventEndPtr;
4868 } else {
4869 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
4870 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
4871 }
4872
4873 for (;;) {
4874 int role;
4875 XML_Bool handleDefault = XML_TRUE;
4876 *eventPP = s;
4877 *eventEndPP = next;
4878 if (tok <= 0) {
4879 if (haveMore && tok != XML_TOK_INVALID) {
4880 *nextPtr = s;
4881 return XML_ERROR_NONE;
4882 }
4883 switch (tok) {
4884 case XML_TOK_INVALID:
4885 *eventPP = next;
4886 return XML_ERROR_INVALID_TOKEN;
4887 case XML_TOK_PARTIAL:
4888 return XML_ERROR_UNCLOSED_TOKEN;
4889 case XML_TOK_PARTIAL_CHAR:
4890 return XML_ERROR_PARTIAL_CHAR;
4891 case -XML_TOK_PROLOG_S:
4892 tok = -tok;
4893 break;
4894 case XML_TOK_NONE:
4895 #ifdef XML_DTD
4896 /* for internal PE NOT referenced between declarations */
4897 if (enc != parser->m_encoding
4898 && ! parser->m_openInternalEntities->betweenDecl) {
4899 *nextPtr = s;
4900 return XML_ERROR_NONE;
4901 }
4902 /* WFC: PE Between Declarations - must check that PE contains
4903 complete markup, not only for external PEs, but also for
4904 internal PEs if the reference occurs between declarations.
4905 */
4906 if (parser->m_isParamEntity || enc != parser->m_encoding) {
4907 if (XmlTokenRole(&parser->m_prologState, XML_TOK_NONE, end, end, enc)
4908 == XML_ROLE_ERROR)
4909 return XML_ERROR_INCOMPLETE_PE;
4910 *nextPtr = s;
4911 return XML_ERROR_NONE;
4912 }
4913 #endif /* XML_DTD */
4914 return XML_ERROR_NO_ELEMENTS;
4915 default:
4916 tok = -tok;
4917 next = end;
4918 break;
4919 }
4920 }
4921 role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
4922 #if XML_GE == 1
4923 switch (role) {
4924 case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
4925 case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
4926 # ifdef XML_DTD
4927 case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
4928 # endif
4929 break;
4930 default:
4931 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
4932 accountingOnAbort(parser);
4933 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
4934 }
4935 }
4936 #endif
4937 switch (role) {
4938 case XML_ROLE_XML_DECL: {
4939 enum XML_Error result = processXmlDecl(parser, 0, s, next);
4940 if (result != XML_ERROR_NONE)
4941 return result;
4942 enc = parser->m_encoding;
4943 handleDefault = XML_FALSE;
4944 } break;
4945 case XML_ROLE_DOCTYPE_NAME:
4946 if (parser->m_startDoctypeDeclHandler) {
4947 parser->m_doctypeName
4948 = poolStoreString(&parser->m_tempPool, enc, s, next);
4949 if (! parser->m_doctypeName)
4950 return XML_ERROR_NO_MEMORY;
4951 poolFinish(&parser->m_tempPool);
4952 parser->m_doctypePubid = NULL;
4953 handleDefault = XML_FALSE;
4954 }
4955 parser->m_doctypeSysid = NULL; /* always initialize to NULL */
4956 break;
4957 case XML_ROLE_DOCTYPE_INTERNAL_SUBSET:
4958 if (parser->m_startDoctypeDeclHandler) {
4959 parser->m_startDoctypeDeclHandler(
4960 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
4961 parser->m_doctypePubid, 1);
4962 parser->m_doctypeName = NULL;
4963 poolClear(&parser->m_tempPool);
4964 handleDefault = XML_FALSE;
4965 }
4966 break;
4967 #ifdef XML_DTD
4968 case XML_ROLE_TEXT_DECL: {
4969 enum XML_Error result = processXmlDecl(parser, 1, s, next);
4970 if (result != XML_ERROR_NONE)
4971 return result;
4972 enc = parser->m_encoding;
4973 handleDefault = XML_FALSE;
4974 } break;
4975 #endif /* XML_DTD */
4976 case XML_ROLE_DOCTYPE_PUBLIC_ID:
4977 #ifdef XML_DTD
4978 parser->m_useForeignDTD = XML_FALSE;
4979 parser->m_declEntity = (ENTITY *)lookup(
4980 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
4981 if (! parser->m_declEntity)
4982 return XML_ERROR_NO_MEMORY;
4983 #endif /* XML_DTD */
4984 dtd->hasParamEntityRefs = XML_TRUE;
4985 if (parser->m_startDoctypeDeclHandler) {
4986 XML_Char *pubId;
4987 if (! XmlIsPublicId(enc, s, next, eventPP))
4988 return XML_ERROR_PUBLICID;
4989 pubId = poolStoreString(&parser->m_tempPool, enc,
4990 s + enc->minBytesPerChar,
4991 next - enc->minBytesPerChar);
4992 if (! pubId)
4993 return XML_ERROR_NO_MEMORY;
4994 normalizePublicId(pubId);
4995 poolFinish(&parser->m_tempPool);
4996 parser->m_doctypePubid = pubId;
4997 handleDefault = XML_FALSE;
4998 goto alreadyChecked;
4999 }
5000 /* fall through */
5001 case XML_ROLE_ENTITY_PUBLIC_ID:
5002 if (! XmlIsPublicId(enc, s, next, eventPP))
5003 return XML_ERROR_PUBLICID;
5004 alreadyChecked:
5005 if (dtd->keepProcessing && parser->m_declEntity) {
5006 XML_Char *tem
5007 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5008 next - enc->minBytesPerChar);
5009 if (! tem)
5010 return XML_ERROR_NO_MEMORY;
5011 normalizePublicId(tem);
5012 parser->m_declEntity->publicId = tem;
5013 poolFinish(&dtd->pool);
5014 /* Don't suppress the default handler if we fell through from
5015 * the XML_ROLE_DOCTYPE_PUBLIC_ID case.
5016 */
5017 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_PUBLIC_ID)
5018 handleDefault = XML_FALSE;
5019 }
5020 break;
5021 case XML_ROLE_DOCTYPE_CLOSE:
5022 if (allowClosingDoctype != XML_TRUE) {
5023 /* Must not close doctype from within expanded parameter entities */
5024 return XML_ERROR_INVALID_TOKEN;
5025 }
5026
5027 if (parser->m_doctypeName) {
5028 parser->m_startDoctypeDeclHandler(
5029 parser->m_handlerArg, parser->m_doctypeName, parser->m_doctypeSysid,
5030 parser->m_doctypePubid, 0);
5031 poolClear(&parser->m_tempPool);
5032 handleDefault = XML_FALSE;
5033 }
5034 /* parser->m_doctypeSysid will be non-NULL in the case of a previous
5035 XML_ROLE_DOCTYPE_SYSTEM_ID, even if parser->m_startDoctypeDeclHandler
5036 was not set, indicating an external subset
5037 */
5038 #ifdef XML_DTD
5039 if (parser->m_doctypeSysid || parser->m_useForeignDTD) {
5040 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5041 dtd->hasParamEntityRefs = XML_TRUE;
5042 if (parser->m_paramEntityParsing
5043 && parser->m_externalEntityRefHandler) {
5044 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5045 externalSubsetName, sizeof(ENTITY));
5046 if (! entity) {
5047 /* The external subset name "#" will have already been
5048 * inserted into the hash table at the start of the
5049 * external entity parsing, so no allocation will happen
5050 * and lookup() cannot fail.
5051 */
5052 return XML_ERROR_NO_MEMORY; /* LCOV_EXCL_LINE */
5053 }
5054 if (parser->m_useForeignDTD)
5055 entity->base = parser->m_curBase;
5056 dtd->paramEntityRead = XML_FALSE;
5057 if (! parser->m_externalEntityRefHandler(
5058 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5059 entity->systemId, entity->publicId))
5060 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5061 if (dtd->paramEntityRead) {
5062 if (! dtd->standalone && parser->m_notStandaloneHandler
5063 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5064 return XML_ERROR_NOT_STANDALONE;
5065 }
5066 /* if we didn't read the foreign DTD then this means that there
5067 is no external subset and we must reset dtd->hasParamEntityRefs
5068 */
5069 else if (! parser->m_doctypeSysid)
5070 dtd->hasParamEntityRefs = hadParamEntityRefs;
5071 /* end of DTD - no need to update dtd->keepProcessing */
5072 }
5073 parser->m_useForeignDTD = XML_FALSE;
5074 }
5075 #endif /* XML_DTD */
5076 if (parser->m_endDoctypeDeclHandler) {
5077 parser->m_endDoctypeDeclHandler(parser->m_handlerArg);
5078 handleDefault = XML_FALSE;
5079 }
5080 break;
5081 case XML_ROLE_INSTANCE_START:
5082 #ifdef XML_DTD
5083 /* if there is no DOCTYPE declaration then now is the
5084 last chance to read the foreign DTD
5085 */
5086 if (parser->m_useForeignDTD) {
5087 XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs;
5088 dtd->hasParamEntityRefs = XML_TRUE;
5089 if (parser->m_paramEntityParsing
5090 && parser->m_externalEntityRefHandler) {
5091 ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5092 externalSubsetName, sizeof(ENTITY));
5093 if (! entity)
5094 return XML_ERROR_NO_MEMORY;
5095 entity->base = parser->m_curBase;
5096 dtd->paramEntityRead = XML_FALSE;
5097 if (! parser->m_externalEntityRefHandler(
5098 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5099 entity->systemId, entity->publicId))
5100 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5101 if (dtd->paramEntityRead) {
5102 if (! dtd->standalone && parser->m_notStandaloneHandler
5103 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5104 return XML_ERROR_NOT_STANDALONE;
5105 }
5106 /* if we didn't read the foreign DTD then this means that there
5107 is no external subset and we must reset dtd->hasParamEntityRefs
5108 */
5109 else
5110 dtd->hasParamEntityRefs = hadParamEntityRefs;
5111 /* end of DTD - no need to update dtd->keepProcessing */
5112 }
5113 }
5114 #endif /* XML_DTD */
5115 parser->m_processor = contentProcessor;
5116 return contentProcessor(parser, s, end, nextPtr);
5117 case XML_ROLE_ATTLIST_ELEMENT_NAME:
5118 parser->m_declElementType = getElementType(parser, enc, s, next);
5119 if (! parser->m_declElementType)
5120 return XML_ERROR_NO_MEMORY;
5121 goto checkAttListDeclHandler;
5122 case XML_ROLE_ATTRIBUTE_NAME:
5123 parser->m_declAttributeId = getAttributeId(parser, enc, s, next);
5124 if (! parser->m_declAttributeId)
5125 return XML_ERROR_NO_MEMORY;
5126 parser->m_declAttributeIsCdata = XML_FALSE;
5127 parser->m_declAttributeType = NULL;
5128 parser->m_declAttributeIsId = XML_FALSE;
5129 goto checkAttListDeclHandler;
5130 case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
5131 parser->m_declAttributeIsCdata = XML_TRUE;
5132 parser->m_declAttributeType = atypeCDATA;
5133 goto checkAttListDeclHandler;
5134 case XML_ROLE_ATTRIBUTE_TYPE_ID:
5135 parser->m_declAttributeIsId = XML_TRUE;
5136 parser->m_declAttributeType = atypeID;
5137 goto checkAttListDeclHandler;
5138 case XML_ROLE_ATTRIBUTE_TYPE_IDREF:
5139 parser->m_declAttributeType = atypeIDREF;
5140 goto checkAttListDeclHandler;
5141 case XML_ROLE_ATTRIBUTE_TYPE_IDREFS:
5142 parser->m_declAttributeType = atypeIDREFS;
5143 goto checkAttListDeclHandler;
5144 case XML_ROLE_ATTRIBUTE_TYPE_ENTITY:
5145 parser->m_declAttributeType = atypeENTITY;
5146 goto checkAttListDeclHandler;
5147 case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES:
5148 parser->m_declAttributeType = atypeENTITIES;
5149 goto checkAttListDeclHandler;
5150 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN:
5151 parser->m_declAttributeType = atypeNMTOKEN;
5152 goto checkAttListDeclHandler;
5153 case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS:
5154 parser->m_declAttributeType = atypeNMTOKENS;
5155 checkAttListDeclHandler:
5156 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5157 handleDefault = XML_FALSE;
5158 break;
5159 case XML_ROLE_ATTRIBUTE_ENUM_VALUE:
5160 case XML_ROLE_ATTRIBUTE_NOTATION_VALUE:
5161 if (dtd->keepProcessing && parser->m_attlistDeclHandler) {
5162 const XML_Char *prefix;
5163 if (parser->m_declAttributeType) {
5164 prefix = enumValueSep;
5165 } else {
5166 prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE ? notationPrefix
5167 : enumValueStart);
5168 }
5169 if (! poolAppendString(&parser->m_tempPool, prefix))
5170 return XML_ERROR_NO_MEMORY;
5171 if (! poolAppend(&parser->m_tempPool, enc, s, next))
5172 return XML_ERROR_NO_MEMORY;
5173 parser->m_declAttributeType = parser->m_tempPool.start;
5174 handleDefault = XML_FALSE;
5175 }
5176 break;
5177 case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
5178 case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
5179 if (dtd->keepProcessing) {
5180 if (! defineAttribute(parser->m_declElementType,
5181 parser->m_declAttributeId,
5182 parser->m_declAttributeIsCdata,
5183 parser->m_declAttributeIsId, 0, parser))
5184 return XML_ERROR_NO_MEMORY;
5185 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5186 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5187 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5188 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5189 /* Enumerated or Notation type */
5190 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5191 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5192 return XML_ERROR_NO_MEMORY;
5193 parser->m_declAttributeType = parser->m_tempPool.start;
5194 poolFinish(&parser->m_tempPool);
5195 }
5196 *eventEndPP = s;
5197 parser->m_attlistDeclHandler(
5198 parser->m_handlerArg, parser->m_declElementType->name,
5199 parser->m_declAttributeId->name, parser->m_declAttributeType, 0,
5200 role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE);
5201 handleDefault = XML_FALSE;
5202 }
5203 }
5204 poolClear(&parser->m_tempPool);
5205 break;
5206 case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
5207 case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
5208 if (dtd->keepProcessing) {
5209 const XML_Char *attVal;
5210 enum XML_Error result = storeAttributeValue(
5211 parser, enc, parser->m_declAttributeIsCdata,
5212 s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd->pool,
5213 XML_ACCOUNT_NONE);
5214 if (result)
5215 return result;
5216 attVal = poolStart(&dtd->pool);
5217 poolFinish(&dtd->pool);
5218 /* ID attributes aren't allowed to have a default */
5219 if (! defineAttribute(
5220 parser->m_declElementType, parser->m_declAttributeId,
5221 parser->m_declAttributeIsCdata, XML_FALSE, attVal, parser))
5222 return XML_ERROR_NO_MEMORY;
5223 if (parser->m_attlistDeclHandler && parser->m_declAttributeType) {
5224 if (*parser->m_declAttributeType == XML_T(ASCII_LPAREN)
5225 || (*parser->m_declAttributeType == XML_T(ASCII_N)
5226 && parser->m_declAttributeType[1] == XML_T(ASCII_O))) {
5227 /* Enumerated or Notation type */
5228 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_RPAREN))
5229 || ! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
5230 return XML_ERROR_NO_MEMORY;
5231 parser->m_declAttributeType = parser->m_tempPool.start;
5232 poolFinish(&parser->m_tempPool);
5233 }
5234 *eventEndPP = s;
5235 parser->m_attlistDeclHandler(
5236 parser->m_handlerArg, parser->m_declElementType->name,
5237 parser->m_declAttributeId->name, parser->m_declAttributeType,
5238 attVal, role == XML_ROLE_FIXED_ATTRIBUTE_VALUE);
5239 poolClear(&parser->m_tempPool);
5240 handleDefault = XML_FALSE;
5241 }
5242 }
5243 break;
5244 case XML_ROLE_ENTITY_VALUE:
5245 if (dtd->keepProcessing) {
5246 #if XML_GE == 1
5247 // This will store the given replacement text in
5248 // parser->m_declEntity->textPtr.
5249 enum XML_Error result = callStoreEntityValue(
5250 parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar,
5251 XML_ACCOUNT_NONE);
5252 if (parser->m_declEntity) {
5253 parser->m_declEntity->textPtr = poolStart(&dtd->entityValuePool);
5254 parser->m_declEntity->textLen
5255 = (int)(poolLength(&dtd->entityValuePool));
5256 poolFinish(&dtd->entityValuePool);
5257 if (parser->m_entityDeclHandler) {
5258 *eventEndPP = s;
5259 parser->m_entityDeclHandler(
5260 parser->m_handlerArg, parser->m_declEntity->name,
5261 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5262 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5263 handleDefault = XML_FALSE;
5264 }
5265 } else
5266 poolDiscard(&dtd->entityValuePool);
5267 if (result != XML_ERROR_NONE)
5268 return result;
5269 #else
5270 // This will store "&entity123;" in parser->m_declEntity->textPtr
5271 // to end up as "&entity123;" in the handler.
5272 if (parser->m_declEntity != NULL) {
5273 const enum XML_Error result
5274 = storeSelfEntityValue(parser, parser->m_declEntity);
5275 if (result != XML_ERROR_NONE)
5276 return result;
5277
5278 if (parser->m_entityDeclHandler) {
5279 *eventEndPP = s;
5280 parser->m_entityDeclHandler(
5281 parser->m_handlerArg, parser->m_declEntity->name,
5282 parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
5283 parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
5284 handleDefault = XML_FALSE;
5285 }
5286 }
5287 #endif
5288 }
5289 break;
5290 case XML_ROLE_DOCTYPE_SYSTEM_ID:
5291 #ifdef XML_DTD
5292 parser->m_useForeignDTD = XML_FALSE;
5293 #endif /* XML_DTD */
5294 dtd->hasParamEntityRefs = XML_TRUE;
5295 if (parser->m_startDoctypeDeclHandler) {
5296 parser->m_doctypeSysid = poolStoreString(&parser->m_tempPool, enc,
5297 s + enc->minBytesPerChar,
5298 next - enc->minBytesPerChar);
5299 if (parser->m_doctypeSysid == NULL)
5300 return XML_ERROR_NO_MEMORY;
5301 poolFinish(&parser->m_tempPool);
5302 handleDefault = XML_FALSE;
5303 }
5304 #ifdef XML_DTD
5305 else
5306 /* use externalSubsetName to make parser->m_doctypeSysid non-NULL
5307 for the case where no parser->m_startDoctypeDeclHandler is set */
5308 parser->m_doctypeSysid = externalSubsetName;
5309 #endif /* XML_DTD */
5310 if (! dtd->standalone
5311 #ifdef XML_DTD
5312 && ! parser->m_paramEntityParsing
5313 #endif /* XML_DTD */
5314 && parser->m_notStandaloneHandler
5315 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5316 return XML_ERROR_NOT_STANDALONE;
5317 #ifndef XML_DTD
5318 break;
5319 #else /* XML_DTD */
5320 if (! parser->m_declEntity) {
5321 parser->m_declEntity = (ENTITY *)lookup(
5322 parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY));
5323 if (! parser->m_declEntity)
5324 return XML_ERROR_NO_MEMORY;
5325 parser->m_declEntity->publicId = NULL;
5326 }
5327 #endif /* XML_DTD */
5328 /* fall through */
5329 case XML_ROLE_ENTITY_SYSTEM_ID:
5330 if (dtd->keepProcessing && parser->m_declEntity) {
5331 parser->m_declEntity->systemId
5332 = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5333 next - enc->minBytesPerChar);
5334 if (! parser->m_declEntity->systemId)
5335 return XML_ERROR_NO_MEMORY;
5336 parser->m_declEntity->base = parser->m_curBase;
5337 poolFinish(&dtd->pool);
5338 /* Don't suppress the default handler if we fell through from
5339 * the XML_ROLE_DOCTYPE_SYSTEM_ID case.
5340 */
5341 if (parser->m_entityDeclHandler && role == XML_ROLE_ENTITY_SYSTEM_ID)
5342 handleDefault = XML_FALSE;
5343 }
5344 break;
5345 case XML_ROLE_ENTITY_COMPLETE:
5346 #if XML_GE == 0
5347 // This will store "&entity123;" in entity->textPtr
5348 // to end up as "&entity123;" in the handler.
5349 if (parser->m_declEntity != NULL) {
5350 const enum XML_Error result
5351 = storeSelfEntityValue(parser, parser->m_declEntity);
5352 if (result != XML_ERROR_NONE)
5353 return result;
5354 }
5355 #endif
5356 if (dtd->keepProcessing && parser->m_declEntity
5357 && parser->m_entityDeclHandler) {
5358 *eventEndPP = s;
5359 parser->m_entityDeclHandler(
5360 parser->m_handlerArg, parser->m_declEntity->name,
5361 parser->m_declEntity->is_param, 0, 0, parser->m_declEntity->base,
5362 parser->m_declEntity->systemId, parser->m_declEntity->publicId, 0);
5363 handleDefault = XML_FALSE;
5364 }
5365 break;
5366 case XML_ROLE_ENTITY_NOTATION_NAME:
5367 if (dtd->keepProcessing && parser->m_declEntity) {
5368 parser->m_declEntity->notation
5369 = poolStoreString(&dtd->pool, enc, s, next);
5370 if (! parser->m_declEntity->notation)
5371 return XML_ERROR_NO_MEMORY;
5372 poolFinish(&dtd->pool);
5373 if (parser->m_unparsedEntityDeclHandler) {
5374 *eventEndPP = s;
5375 parser->m_unparsedEntityDeclHandler(
5376 parser->m_handlerArg, parser->m_declEntity->name,
5377 parser->m_declEntity->base, parser->m_declEntity->systemId,
5378 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5379 handleDefault = XML_FALSE;
5380 } else if (parser->m_entityDeclHandler) {
5381 *eventEndPP = s;
5382 parser->m_entityDeclHandler(
5383 parser->m_handlerArg, parser->m_declEntity->name, 0, 0, 0,
5384 parser->m_declEntity->base, parser->m_declEntity->systemId,
5385 parser->m_declEntity->publicId, parser->m_declEntity->notation);
5386 handleDefault = XML_FALSE;
5387 }
5388 }
5389 break;
5390 case XML_ROLE_GENERAL_ENTITY_NAME: {
5391 if (XmlPredefinedEntityName(enc, s, next)) {
5392 parser->m_declEntity = NULL;
5393 break;
5394 }
5395 if (dtd->keepProcessing) {
5396 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5397 if (! name)
5398 return XML_ERROR_NO_MEMORY;
5399 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities,
5400 name, sizeof(ENTITY));
5401 if (! parser->m_declEntity)
5402 return XML_ERROR_NO_MEMORY;
5403 if (parser->m_declEntity->name != name) {
5404 poolDiscard(&dtd->pool);
5405 parser->m_declEntity = NULL;
5406 } else {
5407 poolFinish(&dtd->pool);
5408 parser->m_declEntity->publicId = NULL;
5409 parser->m_declEntity->is_param = XML_FALSE;
5410 /* if we have a parent parser or are reading an internal parameter
5411 entity, then the entity declaration is not considered "internal"
5412 */
5413 parser->m_declEntity->is_internal
5414 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5415 if (parser->m_entityDeclHandler)
5416 handleDefault = XML_FALSE;
5417 }
5418 } else {
5419 poolDiscard(&dtd->pool);
5420 parser->m_declEntity = NULL;
5421 }
5422 } break;
5423 case XML_ROLE_PARAM_ENTITY_NAME:
5424 #ifdef XML_DTD
5425 if (dtd->keepProcessing) {
5426 const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next);
5427 if (! name)
5428 return XML_ERROR_NO_MEMORY;
5429 parser->m_declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities,
5430 name, sizeof(ENTITY));
5431 if (! parser->m_declEntity)
5432 return XML_ERROR_NO_MEMORY;
5433 if (parser->m_declEntity->name != name) {
5434 poolDiscard(&dtd->pool);
5435 parser->m_declEntity = NULL;
5436 } else {
5437 poolFinish(&dtd->pool);
5438 parser->m_declEntity->publicId = NULL;
5439 parser->m_declEntity->is_param = XML_TRUE;
5440 /* if we have a parent parser or are reading an internal parameter
5441 entity, then the entity declaration is not considered "internal"
5442 */
5443 parser->m_declEntity->is_internal
5444 = ! (parser->m_parentParser || parser->m_openInternalEntities);
5445 if (parser->m_entityDeclHandler)
5446 handleDefault = XML_FALSE;
5447 }
5448 } else {
5449 poolDiscard(&dtd->pool);
5450 parser->m_declEntity = NULL;
5451 }
5452 #else /* not XML_DTD */
5453 parser->m_declEntity = NULL;
5454 #endif /* XML_DTD */
5455 break;
5456 case XML_ROLE_NOTATION_NAME:
5457 parser->m_declNotationPublicId = NULL;
5458 parser->m_declNotationName = NULL;
5459 if (parser->m_notationDeclHandler) {
5460 parser->m_declNotationName
5461 = poolStoreString(&parser->m_tempPool, enc, s, next);
5462 if (! parser->m_declNotationName)
5463 return XML_ERROR_NO_MEMORY;
5464 poolFinish(&parser->m_tempPool);
5465 handleDefault = XML_FALSE;
5466 }
5467 break;
5468 case XML_ROLE_NOTATION_PUBLIC_ID:
5469 if (! XmlIsPublicId(enc, s, next, eventPP))
5470 return XML_ERROR_PUBLICID;
5471 if (parser
5472 ->m_declNotationName) { /* means m_notationDeclHandler != NULL */
5473 XML_Char *tem = poolStoreString(&parser->m_tempPool, enc,
5474 s + enc->minBytesPerChar,
5475 next - enc->minBytesPerChar);
5476 if (! tem)
5477 return XML_ERROR_NO_MEMORY;
5478 normalizePublicId(tem);
5479 parser->m_declNotationPublicId = tem;
5480 poolFinish(&parser->m_tempPool);
5481 handleDefault = XML_FALSE;
5482 }
5483 break;
5484 case XML_ROLE_NOTATION_SYSTEM_ID:
5485 if (parser->m_declNotationName && parser->m_notationDeclHandler) {
5486 const XML_Char *systemId = poolStoreString(&parser->m_tempPool, enc,
5487 s + enc->minBytesPerChar,
5488 next - enc->minBytesPerChar);
5489 if (! systemId)
5490 return XML_ERROR_NO_MEMORY;
5491 *eventEndPP = s;
5492 parser->m_notationDeclHandler(
5493 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5494 systemId, parser->m_declNotationPublicId);
5495 handleDefault = XML_FALSE;
5496 }
5497 poolClear(&parser->m_tempPool);
5498 break;
5499 case XML_ROLE_NOTATION_NO_SYSTEM_ID:
5500 if (parser->m_declNotationPublicId && parser->m_notationDeclHandler) {
5501 *eventEndPP = s;
5502 parser->m_notationDeclHandler(
5503 parser->m_handlerArg, parser->m_declNotationName, parser->m_curBase,
5504 0, parser->m_declNotationPublicId);
5505 handleDefault = XML_FALSE;
5506 }
5507 poolClear(&parser->m_tempPool);
5508 break;
5509 case XML_ROLE_ERROR:
5510 switch (tok) {
5511 case XML_TOK_PARAM_ENTITY_REF:
5512 /* PE references in internal subset are
5513 not allowed within declarations. */
5514 return XML_ERROR_PARAM_ENTITY_REF;
5515 case XML_TOK_XML_DECL:
5516 return XML_ERROR_MISPLACED_XML_PI;
5517 default:
5518 return XML_ERROR_SYNTAX;
5519 }
5520 #ifdef XML_DTD
5521 case XML_ROLE_IGNORE_SECT: {
5522 enum XML_Error result;
5523 if (parser->m_defaultHandler)
5524 reportDefault(parser, enc, s, next);
5525 handleDefault = XML_FALSE;
5526 result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore);
5527 if (result != XML_ERROR_NONE)
5528 return result;
5529 else if (! next) {
5530 parser->m_processor = ignoreSectionProcessor;
5531 return result;
5532 }
5533 } break;
5534 #endif /* XML_DTD */
5535 case XML_ROLE_GROUP_OPEN:
5536 if (parser->m_prologState.level >= parser->m_groupSize) {
5537 if (parser->m_groupSize) {
5538 {
5539 /* Detect and prevent integer overflow */
5540 if (parser->m_groupSize > (unsigned int)(-1) / 2u) {
5541 return XML_ERROR_NO_MEMORY;
5542 }
5543
5544 char *const new_connector = (char *)REALLOC(
5545 parser, parser->m_groupConnector, parser->m_groupSize *= 2);
5546 if (new_connector == NULL) {
5547 parser->m_groupSize /= 2;
5548 return XML_ERROR_NO_MEMORY;
5549 }
5550 parser->m_groupConnector = new_connector;
5551 }
5552
5553 if (dtd->scaffIndex) {
5554 /* Detect and prevent integer overflow.
5555 * The preprocessor guard addresses the "always false" warning
5556 * from -Wtype-limits on platforms where
5557 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
5558 #if UINT_MAX >= SIZE_MAX
5559 if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) {
5560 return XML_ERROR_NO_MEMORY;
5561 }
5562 #endif
5563
5564 int *const new_scaff_index = (int *)REALLOC(
5565 parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int));
5566 if (new_scaff_index == NULL)
5567 return XML_ERROR_NO_MEMORY;
5568 dtd->scaffIndex = new_scaff_index;
5569 }
5570 } else {
5571 parser->m_groupConnector
5572 = (char *)MALLOC(parser, parser->m_groupSize = 32);
5573 if (! parser->m_groupConnector) {
5574 parser->m_groupSize = 0;
5575 return XML_ERROR_NO_MEMORY;
5576 }
5577 }
5578 }
5579 parser->m_groupConnector[parser->m_prologState.level] = 0;
5580 if (dtd->in_eldecl) {
5581 int myindex = nextScaffoldPart(parser);
5582 if (myindex < 0)
5583 return XML_ERROR_NO_MEMORY;
5584 assert(dtd->scaffIndex != NULL);
5585 dtd->scaffIndex[dtd->scaffLevel] = myindex;
5586 dtd->scaffLevel++;
5587 dtd->scaffold[myindex].type = XML_CTYPE_SEQ;
5588 if (parser->m_elementDeclHandler)
5589 handleDefault = XML_FALSE;
5590 }
5591 break;
5592 case XML_ROLE_GROUP_SEQUENCE:
5593 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_PIPE)
5594 return XML_ERROR_SYNTAX;
5595 parser->m_groupConnector[parser->m_prologState.level] = ASCII_COMMA;
5596 if (dtd->in_eldecl && parser->m_elementDeclHandler)
5597 handleDefault = XML_FALSE;
5598 break;
5599 case XML_ROLE_GROUP_CHOICE:
5600 if (parser->m_groupConnector[parser->m_prologState.level] == ASCII_COMMA)
5601 return XML_ERROR_SYNTAX;
5602 if (dtd->in_eldecl
5603 && ! parser->m_groupConnector[parser->m_prologState.level]
5604 && (dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5605 != XML_CTYPE_MIXED)) {
5606 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5607 = XML_CTYPE_CHOICE;
5608 if (parser->m_elementDeclHandler)
5609 handleDefault = XML_FALSE;
5610 }
5611 parser->m_groupConnector[parser->m_prologState.level] = ASCII_PIPE;
5612 break;
5613 case XML_ROLE_PARAM_ENTITY_REF:
5614 #ifdef XML_DTD
5615 case XML_ROLE_INNER_PARAM_ENTITY_REF:
5616 dtd->hasParamEntityRefs = XML_TRUE;
5617 if (! parser->m_paramEntityParsing)
5618 dtd->keepProcessing = dtd->standalone;
5619 else {
5620 const XML_Char *name;
5621 ENTITY *entity;
5622 name = poolStoreString(&dtd->pool, enc, s + enc->minBytesPerChar,
5623 next - enc->minBytesPerChar);
5624 if (! name)
5625 return XML_ERROR_NO_MEMORY;
5626 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
5627 poolDiscard(&dtd->pool);
5628 /* first, determine if a check for an existing declaration is needed;
5629 if yes, check that the entity exists, and that it is internal,
5630 otherwise call the skipped entity handler
5631 */
5632 if (parser->m_prologState.documentEntity
5633 && (dtd->standalone ? ! parser->m_openInternalEntities
5634 : ! dtd->hasParamEntityRefs)) {
5635 if (! entity)
5636 return XML_ERROR_UNDEFINED_ENTITY;
5637 else if (! entity->is_internal) {
5638 /* It's hard to exhaustively search the code to be sure,
5639 * but there doesn't seem to be a way of executing the
5640 * following line. There are two cases:
5641 *
5642 * If 'standalone' is false, the DTD must have no
5643 * parameter entities or we wouldn't have passed the outer
5644 * 'if' statement. That means the only entity in the hash
5645 * table is the external subset name "#" which cannot be
5646 * given as a parameter entity name in XML syntax, so the
5647 * lookup must have returned NULL and we don't even reach
5648 * the test for an internal entity.
5649 *
5650 * If 'standalone' is true, it does not seem to be
5651 * possible to create entities taking this code path that
5652 * are not internal entities, so fail the test above.
5653 *
5654 * Because this analysis is very uncertain, the code is
5655 * being left in place and merely removed from the
5656 * coverage test statistics.
5657 */
5658 return XML_ERROR_ENTITY_DECLARED_IN_PE; /* LCOV_EXCL_LINE */
5659 }
5660 } else if (! entity) {
5661 dtd->keepProcessing = dtd->standalone;
5662 /* cannot report skipped entities in declarations */
5663 if ((role == XML_ROLE_PARAM_ENTITY_REF)
5664 && parser->m_skippedEntityHandler) {
5665 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 1);
5666 handleDefault = XML_FALSE;
5667 }
5668 break;
5669 }
5670 if (entity->open)
5671 return XML_ERROR_RECURSIVE_ENTITY_REF;
5672 if (entity->textPtr) {
5673 enum XML_Error result;
5674 XML_Bool betweenDecl
5675 = (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
5676 result = processEntity(parser, entity, betweenDecl, ENTITY_INTERNAL);
5677 if (result != XML_ERROR_NONE)
5678 return result;
5679 handleDefault = XML_FALSE;
5680 break;
5681 }
5682 if (parser->m_externalEntityRefHandler) {
5683 dtd->paramEntityRead = XML_FALSE;
5684 entity->open = XML_TRUE;
5685 entityTrackingOnOpen(parser, entity, __LINE__);
5686 if (! parser->m_externalEntityRefHandler(
5687 parser->m_externalEntityRefHandlerArg, 0, entity->base,
5688 entity->systemId, entity->publicId)) {
5689 entityTrackingOnClose(parser, entity, __LINE__);
5690 entity->open = XML_FALSE;
5691 return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
5692 }
5693 entityTrackingOnClose(parser, entity, __LINE__);
5694 entity->open = XML_FALSE;
5695 handleDefault = XML_FALSE;
5696 if (! dtd->paramEntityRead) {
5697 dtd->keepProcessing = dtd->standalone;
5698 break;
5699 }
5700 } else {
5701 dtd->keepProcessing = dtd->standalone;
5702 break;
5703 }
5704 }
5705 #endif /* XML_DTD */
5706 if (! dtd->standalone && parser->m_notStandaloneHandler
5707 && ! parser->m_notStandaloneHandler(parser->m_handlerArg))
5708 return XML_ERROR_NOT_STANDALONE;
5709 break;
5710
5711 /* Element declaration stuff */
5712
5713 case XML_ROLE_ELEMENT_NAME:
5714 if (parser->m_elementDeclHandler) {
5715 parser->m_declElementType = getElementType(parser, enc, s, next);
5716 if (! parser->m_declElementType)
5717 return XML_ERROR_NO_MEMORY;
5718 dtd->scaffLevel = 0;
5719 dtd->scaffCount = 0;
5720 dtd->in_eldecl = XML_TRUE;
5721 handleDefault = XML_FALSE;
5722 }
5723 break;
5724
5725 case XML_ROLE_CONTENT_ANY:
5726 case XML_ROLE_CONTENT_EMPTY:
5727 if (dtd->in_eldecl) {
5728 if (parser->m_elementDeclHandler) {
5729 XML_Content *content
5730 = (XML_Content *)MALLOC(parser, sizeof(XML_Content));
5731 if (! content)
5732 return XML_ERROR_NO_MEMORY;
5733 content->quant = XML_CQUANT_NONE;
5734 content->name = NULL;
5735 content->numchildren = 0;
5736 content->children = NULL;
5737 content->type = ((role == XML_ROLE_CONTENT_ANY) ? XML_CTYPE_ANY
5738 : XML_CTYPE_EMPTY);
5739 *eventEndPP = s;
5740 parser->m_elementDeclHandler(
5741 parser->m_handlerArg, parser->m_declElementType->name, content);
5742 handleDefault = XML_FALSE;
5743 }
5744 dtd->in_eldecl = XML_FALSE;
5745 }
5746 break;
5747
5748 case XML_ROLE_CONTENT_PCDATA:
5749 if (dtd->in_eldecl) {
5750 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]].type
5751 = XML_CTYPE_MIXED;
5752 if (parser->m_elementDeclHandler)
5753 handleDefault = XML_FALSE;
5754 }
5755 break;
5756
5757 case XML_ROLE_CONTENT_ELEMENT:
5758 quant = XML_CQUANT_NONE;
5759 goto elementContent;
5760 case XML_ROLE_CONTENT_ELEMENT_OPT:
5761 quant = XML_CQUANT_OPT;
5762 goto elementContent;
5763 case XML_ROLE_CONTENT_ELEMENT_REP:
5764 quant = XML_CQUANT_REP;
5765 goto elementContent;
5766 case XML_ROLE_CONTENT_ELEMENT_PLUS:
5767 quant = XML_CQUANT_PLUS;
5768 elementContent:
5769 if (dtd->in_eldecl) {
5770 ELEMENT_TYPE *el;
5771 const XML_Char *name;
5772 size_t nameLen;
5773 const char *nxt
5774 = (quant == XML_CQUANT_NONE ? next : next - enc->minBytesPerChar);
5775 int myindex = nextScaffoldPart(parser);
5776 if (myindex < 0)
5777 return XML_ERROR_NO_MEMORY;
5778 dtd->scaffold[myindex].type = XML_CTYPE_NAME;
5779 dtd->scaffold[myindex].quant = quant;
5780 el = getElementType(parser, enc, s, nxt);
5781 if (! el)
5782 return XML_ERROR_NO_MEMORY;
5783 name = el->name;
5784 dtd->scaffold[myindex].name = name;
5785 nameLen = 0;
5786 for (; name[nameLen++];)
5787 ;
5788
5789 /* Detect and prevent integer overflow */
5790 if (nameLen > UINT_MAX - dtd->contentStringLen) {
5791 return XML_ERROR_NO_MEMORY;
5792 }
5793
5794 dtd->contentStringLen += (unsigned)nameLen;
5795 if (parser->m_elementDeclHandler)
5796 handleDefault = XML_FALSE;
5797 }
5798 break;
5799
5800 case XML_ROLE_GROUP_CLOSE:
5801 quant = XML_CQUANT_NONE;
5802 goto closeGroup;
5803 case XML_ROLE_GROUP_CLOSE_OPT:
5804 quant = XML_CQUANT_OPT;
5805 goto closeGroup;
5806 case XML_ROLE_GROUP_CLOSE_REP:
5807 quant = XML_CQUANT_REP;
5808 goto closeGroup;
5809 case XML_ROLE_GROUP_CLOSE_PLUS:
5810 quant = XML_CQUANT_PLUS;
5811 closeGroup:
5812 if (dtd->in_eldecl) {
5813 if (parser->m_elementDeclHandler)
5814 handleDefault = XML_FALSE;
5815 dtd->scaffLevel--;
5816 dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel]].quant = quant;
5817 if (dtd->scaffLevel == 0) {
5818 if (! handleDefault) {
5819 XML_Content *model = build_model(parser);
5820 if (! model)
5821 return XML_ERROR_NO_MEMORY;
5822 *eventEndPP = s;
5823 parser->m_elementDeclHandler(
5824 parser->m_handlerArg, parser->m_declElementType->name, model);
5825 }
5826 dtd->in_eldecl = XML_FALSE;
5827 dtd->contentStringLen = 0;
5828 }
5829 }
5830 break;
5831 /* End element declaration stuff */
5832
5833 case XML_ROLE_PI:
5834 if (! reportProcessingInstruction(parser, enc, s, next))
5835 return XML_ERROR_NO_MEMORY;
5836 handleDefault = XML_FALSE;
5837 break;
5838 case XML_ROLE_COMMENT:
5839 if (! reportComment(parser, enc, s, next))
5840 return XML_ERROR_NO_MEMORY;
5841 handleDefault = XML_FALSE;
5842 break;
5843 case XML_ROLE_NONE:
5844 switch (tok) {
5845 case XML_TOK_BOM:
5846 handleDefault = XML_FALSE;
5847 break;
5848 }
5849 break;
5850 case XML_ROLE_DOCTYPE_NONE:
5851 if (parser->m_startDoctypeDeclHandler)
5852 handleDefault = XML_FALSE;
5853 break;
5854 case XML_ROLE_ENTITY_NONE:
5855 if (dtd->keepProcessing && parser->m_entityDeclHandler)
5856 handleDefault = XML_FALSE;
5857 break;
5858 case XML_ROLE_NOTATION_NONE:
5859 if (parser->m_notationDeclHandler)
5860 handleDefault = XML_FALSE;
5861 break;
5862 case XML_ROLE_ATTLIST_NONE:
5863 if (dtd->keepProcessing && parser->m_attlistDeclHandler)
5864 handleDefault = XML_FALSE;
5865 break;
5866 case XML_ROLE_ELEMENT_NONE:
5867 if (parser->m_elementDeclHandler)
5868 handleDefault = XML_FALSE;
5869 break;
5870 } /* end of big switch */
5871
5872 if (handleDefault && parser->m_defaultHandler)
5873 reportDefault(parser, enc, s, next);
5874
5875 switch (parser->m_parsingStatus.parsing) {
5876 case XML_SUSPENDED:
5877 *nextPtr = next;
5878 return XML_ERROR_NONE;
5879 case XML_FINISHED:
5880 return XML_ERROR_ABORTED;
5881 case XML_PARSING:
5882 if (parser->m_reenter) {
5883 *nextPtr = next;
5884 return XML_ERROR_NONE;
5885 }
5886 /* Fall through */
5887 default:
5888 s = next;
5889 tok = XmlPrologTok(enc, s, end, &next);
5890 }
5891 }
5892 /* not reached */
5893 }
5894
5895 static enum XML_Error PTRCALL
5896 epilogProcessor(XML_Parser parser, const char *s, const char *end,
5897 const char **nextPtr) {
5898 parser->m_processor = epilogProcessor;
5899 parser->m_eventPtr = s;
5900 for (;;) {
5901 const char *next = NULL;
5902 int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
5903 #if XML_GE == 1
5904 if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
5905 XML_ACCOUNT_DIRECT)) {
5906 accountingOnAbort(parser);
5907 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
5908 }
5909 #endif
5910 parser->m_eventEndPtr = next;
5911 switch (tok) {
5912 /* report partial linebreak - it might be the last token */
5913 case -XML_TOK_PROLOG_S:
5914 if (parser->m_defaultHandler) {
5915 reportDefault(parser, parser->m_encoding, s, next);
5916 if (parser->m_parsingStatus.parsing == XML_FINISHED)
5917 return XML_ERROR_ABORTED;
5918 }
5919 *nextPtr = next;
5920 return XML_ERROR_NONE;
5921 case XML_TOK_NONE:
5922 *nextPtr = s;
5923 return XML_ERROR_NONE;
5924 case XML_TOK_PROLOG_S:
5925 if (parser->m_defaultHandler)
5926 reportDefault(parser, parser->m_encoding, s, next);
5927 break;
5928 case XML_TOK_PI:
5929 if (! reportProcessingInstruction(parser, parser->m_encoding, s, next))
5930 return XML_ERROR_NO_MEMORY;
5931 break;
5932 case XML_TOK_COMMENT:
5933 if (! reportComment(parser, parser->m_encoding, s, next))
5934 return XML_ERROR_NO_MEMORY;
5935 break;
5936 case XML_TOK_INVALID:
5937 parser->m_eventPtr = next;
5938 return XML_ERROR_INVALID_TOKEN;
5939 case XML_TOK_PARTIAL:
5940 if (! parser->m_parsingStatus.finalBuffer) {
5941 *nextPtr = s;
5942 return XML_ERROR_NONE;
5943 }
5944 return XML_ERROR_UNCLOSED_TOKEN;
5945 case XML_TOK_PARTIAL_CHAR:
5946 if (! parser->m_parsingStatus.finalBuffer) {
5947 *nextPtr = s;
5948 return XML_ERROR_NONE;
5949 }
5950 return XML_ERROR_PARTIAL_CHAR;
5951 default:
5952 return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
5953 }
5954 parser->m_eventPtr = s = next;
5955 switch (parser->m_parsingStatus.parsing) {
5956 case XML_SUSPENDED:
5957 *nextPtr = next;
5958 return XML_ERROR_NONE;
5959 case XML_FINISHED:
5960 return XML_ERROR_ABORTED;
5961 case XML_PARSING:
5962 if (parser->m_reenter) {
5963 return XML_ERROR_UNEXPECTED_STATE; // LCOV_EXCL_LINE
5964 }
5965 /* Fall through */
5966 default:;
5967 }
5968 }
5969 }
5970
5971 static enum XML_Error
5972 processEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl,
5973 enum EntityType type) {
5974 OPEN_INTERNAL_ENTITY *openEntity, **openEntityList, **freeEntityList;
5975 switch (type) {
5976 case ENTITY_INTERNAL:
5977 parser->m_processor = internalEntityProcessor;
5978 openEntityList = &parser->m_openInternalEntities;
5979 freeEntityList = &parser->m_freeInternalEntities;
5980 break;
5981 case ENTITY_ATTRIBUTE:
5982 openEntityList = &parser->m_openAttributeEntities;
5983 freeEntityList = &parser->m_freeAttributeEntities;
5984 break;
5985 case ENTITY_VALUE:
5986 openEntityList = &parser->m_openValueEntities;
5987 freeEntityList = &parser->m_freeValueEntities;
5988 break;
5989 /* default case serves merely as a safety net in case of a
5990 * wrong entityType. Therefore we exclude the following lines
5991 * from the test coverage.
5992 *
5993 * LCOV_EXCL_START
5994 */
5995 default:
5996 // Should not reach here
5997 assert(0);
5998 /* LCOV_EXCL_STOP */
5999 }
6000
6001 if (*freeEntityList) {
6002 openEntity = *freeEntityList;
6003 *freeEntityList = openEntity->next;
6004 } else {
6005 openEntity
6006 = (OPEN_INTERNAL_ENTITY *)MALLOC(parser, sizeof(OPEN_INTERNAL_ENTITY));
6007 if (! openEntity)
6008 return XML_ERROR_NO_MEMORY;
6009 }
6010 entity->open = XML_TRUE;
6011 entity->hasMore = XML_TRUE;
6012 #if XML_GE == 1
6013 entityTrackingOnOpen(parser, entity, __LINE__);
6014 #endif
6015 entity->processed = 0;
6016 openEntity->next = *openEntityList;
6017 *openEntityList = openEntity;
6018 openEntity->entity = entity;
6019 openEntity->type = type;
6020 openEntity->startTagLevel = parser->m_tagLevel;
6021 openEntity->betweenDecl = betweenDecl;
6022 openEntity->internalEventPtr = NULL;
6023 openEntity->internalEventEndPtr = NULL;
6024
6025 // Only internal entities make use of the reenter flag
6026 // therefore no need to set it for other entity types
6027 if (type == ENTITY_INTERNAL) {
6028 triggerReenter(parser);
6029 }
6030 return XML_ERROR_NONE;
6031 }
6032
6033 static enum XML_Error PTRCALL
6034 internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
6035 const char **nextPtr) {
6036 UNUSED_P(s);
6037 UNUSED_P(end);
6038 UNUSED_P(nextPtr);
6039 ENTITY *entity;
6040 const char *textStart, *textEnd;
6041 const char *next;
6042 enum XML_Error result;
6043 OPEN_INTERNAL_ENTITY *openEntity = parser->m_openInternalEntities;
6044 if (! openEntity)
6045 return XML_ERROR_UNEXPECTED_STATE;
6046
6047 entity = openEntity->entity;
6048
6049 // This will return early
6050 if (entity->hasMore) {
6051 textStart = ((const char *)entity->textPtr) + entity->processed;
6052 textEnd = (const char *)(entity->textPtr + entity->textLen);
6053 /* Set a safe default value in case 'next' does not get set */
6054 next = textStart;
6055
6056 if (entity->is_param) {
6057 int tok
6058 = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next);
6059 result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd,
6060 tok, next, &next, XML_FALSE, XML_FALSE,
6061 XML_ACCOUNT_ENTITY_EXPANSION);
6062 } else {
6063 result = doContent(parser, openEntity->startTagLevel,
6064 parser->m_internalEncoding, textStart, textEnd, &next,
6065 XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION);
6066 }
6067
6068 if (result != XML_ERROR_NONE)
6069 return result;
6070 // Check if entity is complete, if not, mark down how much of it is
6071 // processed
6072 if (textEnd != next
6073 && (parser->m_parsingStatus.parsing == XML_SUSPENDED
6074 || (parser->m_parsingStatus.parsing == XML_PARSING
6075 && parser->m_reenter))) {
6076 entity->processed = (int)(next - (const char *)entity->textPtr);
6077 return result;
6078 }
6079
6080 // Entity is complete. We cannot close it here since we need to first
6081 // process its possible inner entities (which are added to the
6082 // m_openInternalEntities during doProlog or doContent calls above)
6083 entity->hasMore = XML_FALSE;
6084 triggerReenter(parser);
6085 return result;
6086 } // End of entity processing, "if" block will return here
6087
6088 // Remove fully processed openEntity from open entity list.
6089 #if XML_GE == 1
6090 entityTrackingOnClose(parser, entity, __LINE__);
6091 #endif
6092 // openEntity is m_openInternalEntities' head, as we set it at the start of
6093 // this function and we skipped doProlog and doContent calls with hasMore set
6094 // to false. This means we can directly remove the head of
6095 // m_openInternalEntities
6096 assert(parser->m_openInternalEntities == openEntity);
6097 entity->open = XML_FALSE;
6098 parser->m_openInternalEntities = parser->m_openInternalEntities->next;
6099
6100 /* put openEntity back in list of free instances */
6101 openEntity->next = parser->m_freeInternalEntities;
6102 parser->m_freeInternalEntities = openEntity;
6103
6104 if (parser->m_openInternalEntities == NULL) {
6105 parser->m_processor = entity->is_param ? prologProcessor : contentProcessor;
6106 }
6107 triggerReenter(parser);
6108 return XML_ERROR_NONE;
6109 }
6110
6111 static enum XML_Error PTRCALL
6112 errorProcessor(XML_Parser parser, const char *s, const char *end,
6113 const char **nextPtr) {
6114 UNUSED_P(s);
6115 UNUSED_P(end);
6116 UNUSED_P(nextPtr);
6117 return parser->m_errorCode;
6118 }
6119
6120 static enum XML_Error
6121 storeAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6122 const char *ptr, const char *end, STRING_POOL *pool,
6123 enum XML_Account account) {
6124 const char *next = ptr;
6125 enum XML_Error result = XML_ERROR_NONE;
6126
6127 while (1) {
6128 if (! parser->m_openAttributeEntities) {
6129 result = appendAttributeValue(parser, enc, isCdata, next, end, pool,
6130 account, &next);
6131 } else {
6132 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openAttributeEntities;
6133 if (! openEntity)
6134 return XML_ERROR_UNEXPECTED_STATE;
6135
6136 ENTITY *const entity = openEntity->entity;
6137 const char *const textStart
6138 = ((const char *)entity->textPtr) + entity->processed;
6139 const char *const textEnd
6140 = (const char *)(entity->textPtr + entity->textLen);
6141 /* Set a safe default value in case 'next' does not get set */
6142 const char *nextInEntity = textStart;
6143 if (entity->hasMore) {
6144 result = appendAttributeValue(
6145 parser, parser->m_internalEncoding, isCdata, textStart, textEnd,
6146 pool, XML_ACCOUNT_ENTITY_EXPANSION, &nextInEntity);
6147 if (result != XML_ERROR_NONE)
6148 break;
6149 // Check if entity is complete, if not, mark down how much of it is
6150 // processed. A XML_SUSPENDED check here is not required as
6151 // appendAttributeValue will never suspend the parser.
6152 if (textEnd != nextInEntity) {
6153 entity->processed
6154 = (int)(nextInEntity - (const char *)entity->textPtr);
6155 continue;
6156 }
6157
6158 // Entity is complete. We cannot close it here since we need to first
6159 // process its possible inner entities (which are added to the
6160 // m_openAttributeEntities during appendAttributeValue)
6161 entity->hasMore = XML_FALSE;
6162 continue;
6163 } // End of entity processing, "if" block skips the rest
6164
6165 // Remove fully processed openEntity from open entity list.
6166 #if XML_GE == 1
6167 entityTrackingOnClose(parser, entity, __LINE__);
6168 #endif
6169 // openEntity is m_openAttributeEntities' head, since we set it at the
6170 // start of this function and because we skipped appendAttributeValue call
6171 // with hasMore set to false. This means we can directly remove the head
6172 // of m_openAttributeEntities
6173 assert(parser->m_openAttributeEntities == openEntity);
6174 entity->open = XML_FALSE;
6175 parser->m_openAttributeEntities = parser->m_openAttributeEntities->next;
6176
6177 /* put openEntity back in list of free instances */
6178 openEntity->next = parser->m_freeAttributeEntities;
6179 parser->m_freeAttributeEntities = openEntity;
6180 }
6181
6182 // Break if an error occurred or there is nothing left to process
6183 if (result || (parser->m_openAttributeEntities == NULL && end == next)) {
6184 break;
6185 }
6186 }
6187
6188 if (result)
6189 return result;
6190 if (! isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
6191 poolChop(pool);
6192 if (! poolAppendChar(pool, XML_T('\0')))
6193 return XML_ERROR_NO_MEMORY;
6194 return XML_ERROR_NONE;
6195 }
6196
6197 static enum XML_Error
6198 appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
6199 const char *ptr, const char *end, STRING_POOL *pool,
6200 enum XML_Account account, const char **nextPtr) {
6201 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6202 #ifndef XML_DTD
6203 UNUSED_P(account);
6204 #endif
6205
6206 for (;;) {
6207 const char *next
6208 = ptr; /* XmlAttributeValueTok doesn't always set the last arg */
6209 int tok = XmlAttributeValueTok(enc, ptr, end, &next);
6210 #if XML_GE == 1
6211 if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
6212 accountingOnAbort(parser);
6213 return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6214 }
6215 #endif
6216 switch (tok) {
6217 case XML_TOK_NONE:
6218 if (nextPtr) {
6219 *nextPtr = next;
6220 }
6221 return XML_ERROR_NONE;
6222 case XML_TOK_INVALID:
6223 if (enc == parser->m_encoding)
6224 parser->m_eventPtr = next;
6225 return XML_ERROR_INVALID_TOKEN;
6226 case XML_TOK_PARTIAL:
6227 if (enc == parser->m_encoding)
6228 parser->m_eventPtr = ptr;
6229 return XML_ERROR_INVALID_TOKEN;
6230 case XML_TOK_CHAR_REF: {
6231 XML_Char buf[XML_ENCODE_MAX];
6232 int i;
6233 int n = XmlCharRefNumber(enc, ptr);
6234 if (n < 0) {
6235 if (enc == parser->m_encoding)
6236 parser->m_eventPtr = ptr;
6237 return XML_ERROR_BAD_CHAR_REF;
6238 }
6239 if (! isCdata && n == 0x20 /* space */
6240 && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6241 break;
6242 n = XmlEncode(n, (ICHAR *)buf);
6243 /* The XmlEncode() functions can never return 0 here. That
6244 * error return happens if the code point passed in is either
6245 * negative or greater than or equal to 0x110000. The
6246 * XmlCharRefNumber() functions will all return a number
6247 * strictly less than 0x110000 or a negative value if an error
6248 * occurred. The negative value is intercepted above, so
6249 * XmlEncode() is never passed a value it might return an
6250 * error for.
6251 */
6252 for (i = 0; i < n; i++) {
6253 if (! poolAppendChar(pool, buf[i]))
6254 return XML_ERROR_NO_MEMORY;
6255 }
6256 } break;
6257 case XML_TOK_DATA_CHARS:
6258 if (! poolAppend(pool, enc, ptr, next))
6259 return XML_ERROR_NO_MEMORY;
6260 break;
6261 case XML_TOK_TRAILING_CR:
6262 next = ptr + enc->minBytesPerChar;
6263 /* fall through */
6264 case XML_TOK_ATTRIBUTE_VALUE_S:
6265 case XML_TOK_DATA_NEWLINE:
6266 if (! isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
6267 break;
6268 if (! poolAppendChar(pool, 0x20))
6269 return XML_ERROR_NO_MEMORY;
6270 break;
6271 case XML_TOK_ENTITY_REF: {
6272 const XML_Char *name;
6273 ENTITY *entity;
6274 char checkEntityDecl;
6275 XML_Char ch = (XML_Char)XmlPredefinedEntityName(
6276 enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
6277 if (ch) {
6278 #if XML_GE == 1
6279 /* NOTE: We are replacing 4-6 characters original input for 1 character
6280 * so there is no amplification and hence recording without
6281 * protection. */
6282 accountingDiffTolerated(parser, tok, (char *)&ch,
6283 ((char *)&ch) + sizeof(XML_Char), __LINE__,
6284 XML_ACCOUNT_ENTITY_EXPANSION);
6285 #endif /* XML_GE == 1 */
6286 if (! poolAppendChar(pool, ch))
6287 return XML_ERROR_NO_MEMORY;
6288 break;
6289 }
6290 name = poolStoreString(&parser->m_temp2Pool, enc,
6291 ptr + enc->minBytesPerChar,
6292 next - enc->minBytesPerChar);
6293 if (! name)
6294 return XML_ERROR_NO_MEMORY;
6295 entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0);
6296 poolDiscard(&parser->m_temp2Pool);
6297 /* First, determine if a check for an existing declaration is needed;
6298 if yes, check that the entity exists, and that it is internal.
6299 */
6300 if (pool == &dtd->pool) /* are we called from prolog? */
6301 checkEntityDecl =
6302 #ifdef XML_DTD
6303 parser->m_prologState.documentEntity &&
6304 #endif /* XML_DTD */
6305 (dtd->standalone ? ! parser->m_openInternalEntities
6306 : ! dtd->hasParamEntityRefs);
6307 else /* if (pool == &parser->m_tempPool): we are called from content */
6308 checkEntityDecl = ! dtd->hasParamEntityRefs || dtd->standalone;
6309 if (checkEntityDecl) {
6310 if (! entity)
6311 return XML_ERROR_UNDEFINED_ENTITY;
6312 else if (! entity->is_internal)
6313 return XML_ERROR_ENTITY_DECLARED_IN_PE;
6314 } else if (! entity) {
6315 /* Cannot report skipped entity here - see comments on
6316 parser->m_skippedEntityHandler.
6317 if (parser->m_skippedEntityHandler)
6318 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6319 */
6320 /* Cannot call the default handler because this would be
6321 out of sync with the call to the startElementHandler.
6322 if ((pool == &parser->m_tempPool) && parser->m_defaultHandler)
6323 reportDefault(parser, enc, ptr, next);
6324 */
6325 break;
6326 }
6327 if (entity->open) {
6328 if (enc == parser->m_encoding) {
6329 /* It does not appear that this line can be executed.
6330 *
6331 * The "if (entity->open)" check catches recursive entity
6332 * definitions. In order to be called with an open
6333 * entity, it must have gone through this code before and
6334 * been through the recursive call to
6335 * appendAttributeValue() some lines below. That call
6336 * sets the local encoding ("enc") to the parser's
6337 * internal encoding (internal_utf8 or internal_utf16),
6338 * which can never be the same as the principle encoding.
6339 * It doesn't appear there is another code path that gets
6340 * here with entity->open being TRUE.
6341 *
6342 * Since it is not certain that this logic is watertight,
6343 * we keep the line and merely exclude it from coverage
6344 * tests.
6345 */
6346 parser->m_eventPtr = ptr; /* LCOV_EXCL_LINE */
6347 }
6348 return XML_ERROR_RECURSIVE_ENTITY_REF;
6349 }
6350 if (entity->notation) {
6351 if (enc == parser->m_encoding)
6352 parser->m_eventPtr = ptr;
6353 return XML_ERROR_BINARY_ENTITY_REF;
6354 }
6355 if (! entity->textPtr) {
6356 if (enc == parser->m_encoding)
6357 parser->m_eventPtr = ptr;
6358 return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
6359 } else {
6360 enum XML_Error result;
6361 result = processEntity(parser, entity, XML_FALSE, ENTITY_ATTRIBUTE);
6362 if ((result == XML_ERROR_NONE) && (nextPtr != NULL)) {
6363 *nextPtr = next;
6364 }
6365 return result;
6366 }
6367 } break;
6368 default:
6369 /* The only token returned by XmlAttributeValueTok() that does
6370 * not have an explicit case here is XML_TOK_PARTIAL_CHAR.
6371 * Getting that would require an entity name to contain an
6372 * incomplete XML character (e.g. \xE2\x82); however previous
6373 * tokenisers will have already recognised and rejected such
6374 * names before XmlAttributeValueTok() gets a look-in. This
6375 * default case should be retained as a safety net, but the code
6376 * excluded from coverage tests.
6377 *
6378 * LCOV_EXCL_START
6379 */
6380 if (enc == parser->m_encoding)
6381 parser->m_eventPtr = ptr;
6382 return XML_ERROR_UNEXPECTED_STATE;
6383 /* LCOV_EXCL_STOP */
6384 }
6385 ptr = next;
6386 }
6387 /* not reached */
6388 }
6389
6390 #if XML_GE == 1
6391 static enum XML_Error
6392 storeEntityValue(XML_Parser parser, const ENCODING *enc,
6393 const char *entityTextPtr, const char *entityTextEnd,
6394 enum XML_Account account, const char **nextPtr) {
6395 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6396 STRING_POOL *pool = &(dtd->entityValuePool);
6397 enum XML_Error result = XML_ERROR_NONE;
6398 # ifdef XML_DTD
6399 int oldInEntityValue = parser->m_prologState.inEntityValue;
6400 parser->m_prologState.inEntityValue = 1;
6401 # else
6402 UNUSED_P(account);
6403 # endif /* XML_DTD */
6404 /* never return Null for the value argument in EntityDeclHandler,
6405 since this would indicate an external entity; therefore we
6406 have to make sure that entityValuePool.start is not null */
6407 if (! pool->blocks) {
6408 if (! poolGrow(pool))
6409 return XML_ERROR_NO_MEMORY;
6410 }
6411
6412 const char *next;
6413 for (;;) {
6414 next
6415 = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
6416 int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
6417
6418 if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
6419 account)) {
6420 accountingOnAbort(parser);
6421 result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
6422 goto endEntityValue;
6423 }
6424
6425 switch (tok) {
6426 case XML_TOK_PARAM_ENTITY_REF:
6427 # ifdef XML_DTD
6428 if (parser->m_isParamEntity || enc != parser->m_encoding) {
6429 const XML_Char *name;
6430 ENTITY *entity;
6431 name = poolStoreString(&parser->m_tempPool, enc,
6432 entityTextPtr + enc->minBytesPerChar,
6433 next - enc->minBytesPerChar);
6434 if (! name) {
6435 result = XML_ERROR_NO_MEMORY;
6436 goto endEntityValue;
6437 }
6438 entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0);
6439 poolDiscard(&parser->m_tempPool);
6440 if (! entity) {
6441 /* not a well-formedness error - see XML 1.0: WFC Entity Declared */
6442 /* cannot report skipped entity here - see comments on
6443 parser->m_skippedEntityHandler
6444 if (parser->m_skippedEntityHandler)
6445 parser->m_skippedEntityHandler(parser->m_handlerArg, name, 0);
6446 */
6447 dtd->keepProcessing = dtd->standalone;
6448 goto endEntityValue;
6449 }
6450 if (entity->open || (entity == parser->m_declEntity)) {
6451 if (enc == parser->m_encoding)
6452 parser->m_eventPtr = entityTextPtr;
6453 result = XML_ERROR_RECURSIVE_ENTITY_REF;
6454 goto endEntityValue;
6455 }
6456 if (entity->systemId) {
6457 if (parser->m_externalEntityRefHandler) {
6458 dtd->paramEntityRead = XML_FALSE;
6459 entity->open = XML_TRUE;
6460 entityTrackingOnOpen(parser, entity, __LINE__);
6461 if (! parser->m_externalEntityRefHandler(
6462 parser->m_externalEntityRefHandlerArg, 0, entity->base,
6463 entity->systemId, entity->publicId)) {
6464 entityTrackingOnClose(parser, entity, __LINE__);
6465 entity->open = XML_FALSE;
6466 result = XML_ERROR_EXTERNAL_ENTITY_HANDLING;
6467 goto endEntityValue;
6468 }
6469 entityTrackingOnClose(parser, entity, __LINE__);
6470 entity->open = XML_FALSE;
6471 if (! dtd->paramEntityRead)
6472 dtd->keepProcessing = dtd->standalone;
6473 } else
6474 dtd->keepProcessing = dtd->standalone;
6475 } else {
6476 result = processEntity(parser, entity, XML_FALSE, ENTITY_VALUE);
6477 goto endEntityValue;
6478 }
6479 break;
6480 }
6481 # endif /* XML_DTD */
6482 /* In the internal subset, PE references are not legal
6483 within markup declarations, e.g entity values in this case. */
6484 parser->m_eventPtr = entityTextPtr;
6485 result = XML_ERROR_PARAM_ENTITY_REF;
6486 goto endEntityValue;
6487 case XML_TOK_NONE:
6488 result = XML_ERROR_NONE;
6489 goto endEntityValue;
6490 case XML_TOK_ENTITY_REF:
6491 case XML_TOK_DATA_CHARS:
6492 if (! poolAppend(pool, enc, entityTextPtr, next)) {
6493 result = XML_ERROR_NO_MEMORY;
6494 goto endEntityValue;
6495 }
6496 break;
6497 case XML_TOK_TRAILING_CR:
6498 next = entityTextPtr + enc->minBytesPerChar;
6499 /* fall through */
6500 case XML_TOK_DATA_NEWLINE:
6501 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6502 result = XML_ERROR_NO_MEMORY;
6503 goto endEntityValue;
6504 }
6505 *(pool->ptr)++ = 0xA;
6506 break;
6507 case XML_TOK_CHAR_REF: {
6508 XML_Char buf[XML_ENCODE_MAX];
6509 int i;
6510 int n = XmlCharRefNumber(enc, entityTextPtr);
6511 if (n < 0) {
6512 if (enc == parser->m_encoding)
6513 parser->m_eventPtr = entityTextPtr;
6514 result = XML_ERROR_BAD_CHAR_REF;
6515 goto endEntityValue;
6516 }
6517 n = XmlEncode(n, (ICHAR *)buf);
6518 /* The XmlEncode() functions can never return 0 here. That
6519 * error return happens if the code point passed in is either
6520 * negative or greater than or equal to 0x110000. The
6521 * XmlCharRefNumber() functions will all return a number
6522 * strictly less than 0x110000 or a negative value if an error
6523 * occurred. The negative value is intercepted above, so
6524 * XmlEncode() is never passed a value it might return an
6525 * error for.
6526 */
6527 for (i = 0; i < n; i++) {
6528 if (pool->end == pool->ptr && ! poolGrow(pool)) {
6529 result = XML_ERROR_NO_MEMORY;
6530 goto endEntityValue;
6531 }
6532 *(pool->ptr)++ = buf[i];
6533 }
6534 } break;
6535 case XML_TOK_PARTIAL:
6536 if (enc == parser->m_encoding)
6537 parser->m_eventPtr = entityTextPtr;
6538 result = XML_ERROR_INVALID_TOKEN;
6539 goto endEntityValue;
6540 case XML_TOK_INVALID:
6541 if (enc == parser->m_encoding)
6542 parser->m_eventPtr = next;
6543 result = XML_ERROR_INVALID_TOKEN;
6544 goto endEntityValue;
6545 default:
6546 /* This default case should be unnecessary -- all the tokens
6547 * that XmlEntityValueTok() can return have their own explicit
6548 * cases -- but should be retained for safety. We do however
6549 * exclude it from the coverage statistics.
6550 *
6551 * LCOV_EXCL_START
6552 */
6553 if (enc == parser->m_encoding)
6554 parser->m_eventPtr = entityTextPtr;
6555 result = XML_ERROR_UNEXPECTED_STATE;
6556 goto endEntityValue;
6557 /* LCOV_EXCL_STOP */
6558 }
6559 entityTextPtr = next;
6560 }
6561 endEntityValue:
6562 # ifdef XML_DTD
6563 parser->m_prologState.inEntityValue = oldInEntityValue;
6564 # endif /* XML_DTD */
6565 // If 'nextPtr' is given, it should be updated during the processing
6566 if (nextPtr != NULL) {
6567 *nextPtr = next;
6568 }
6569 return result;
6570 }
6571
6572 static enum XML_Error
6573 callStoreEntityValue(XML_Parser parser, const ENCODING *enc,
6574 const char *entityTextPtr, const char *entityTextEnd,
6575 enum XML_Account account) {
6576 const char *next = entityTextPtr;
6577 enum XML_Error result = XML_ERROR_NONE;
6578 while (1) {
6579 if (! parser->m_openValueEntities) {
6580 result
6581 = storeEntityValue(parser, enc, next, entityTextEnd, account, &next);
6582 } else {
6583 OPEN_INTERNAL_ENTITY *const openEntity = parser->m_openValueEntities;
6584 if (! openEntity)
6585 return XML_ERROR_UNEXPECTED_STATE;
6586
6587 ENTITY *const entity = openEntity->entity;
6588 const char *const textStart
6589 = ((const char *)entity->textPtr) + entity->processed;
6590 const char *const textEnd
6591 = (const char *)(entity->textPtr + entity->textLen);
6592 /* Set a safe default value in case 'next' does not get set */
6593 const char *nextInEntity = textStart;
6594 if (entity->hasMore) {
6595 result = storeEntityValue(parser, parser->m_internalEncoding, textStart,
6596 textEnd, XML_ACCOUNT_ENTITY_EXPANSION,
6597 &nextInEntity);
6598 if (result != XML_ERROR_NONE)
6599 break;
6600 // Check if entity is complete, if not, mark down how much of it is
6601 // processed. A XML_SUSPENDED check here is not required as
6602 // appendAttributeValue will never suspend the parser.
6603 if (textEnd != nextInEntity) {
6604 entity->processed
6605 = (int)(nextInEntity - (const char *)entity->textPtr);
6606 continue;
6607 }
6608
6609 // Entity is complete. We cannot close it here since we need to first
6610 // process its possible inner entities (which are added to the
6611 // m_openValueEntities during storeEntityValue)
6612 entity->hasMore = XML_FALSE;
6613 continue;
6614 } // End of entity processing, "if" block skips the rest
6615
6616 // Remove fully processed openEntity from open entity list.
6617 # if XML_GE == 1
6618 entityTrackingOnClose(parser, entity, __LINE__);
6619 # endif
6620 // openEntity is m_openValueEntities' head, since we set it at the
6621 // start of this function and because we skipped storeEntityValue call
6622 // with hasMore set to false. This means we can directly remove the head
6623 // of m_openValueEntities
6624 assert(parser->m_openValueEntities == openEntity);
6625 entity->open = XML_FALSE;
6626 parser->m_openValueEntities = parser->m_openValueEntities->next;
6627
6628 /* put openEntity back in list of free instances */
6629 openEntity->next = parser->m_freeValueEntities;
6630 parser->m_freeValueEntities = openEntity;
6631 }
6632
6633 // Break if an error occurred or there is nothing left to process
6634 if (result
6635 || (parser->m_openValueEntities == NULL && entityTextEnd == next)) {
6636 break;
6637 }
6638 }
6639
6640 return result;
6641 }
6642
6643 #else /* XML_GE == 0 */
6644
6645 static enum XML_Error
6646 storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
6647 // This will store "&entity123;" in entity->textPtr
6648 // to end up as "&entity123;" in the handler.
6649 const char *const entity_start = "&";
6650 const char *const entity_end = ";";
6651
6652 STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
6653 if (! poolAppendString(pool, entity_start)
6654 || ! poolAppendString(pool, entity->name)
6655 || ! poolAppendString(pool, entity_end)) {
6656 poolDiscard(pool);
6657 return XML_ERROR_NO_MEMORY;
6658 }
6659
6660 entity->textPtr = poolStart(pool);
6661 entity->textLen = (int)(poolLength(pool));
6662 poolFinish(pool);
6663
6664 return XML_ERROR_NONE;
6665 }
6666
6667 #endif /* XML_GE == 0 */
6668
6669 static void FASTCALL
6670 normalizeLines(XML_Char *s) {
6671 XML_Char *p;
6672 for (;; s++) {
6673 if (*s == XML_T('\0'))
6674 return;
6675 if (*s == 0xD)
6676 break;
6677 }
6678 p = s;
6679 do {
6680 if (*s == 0xD) {
6681 *p++ = 0xA;
6682 if (*++s == 0xA)
6683 s++;
6684 } else
6685 *p++ = *s++;
6686 } while (*s);
6687 *p = XML_T('\0');
6688 }
6689
6690 static int
6691 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
6692 const char *start, const char *end) {
6693 const XML_Char *target;
6694 XML_Char *data;
6695 const char *tem;
6696 if (! parser->m_processingInstructionHandler) {
6697 if (parser->m_defaultHandler)
6698 reportDefault(parser, enc, start, end);
6699 return 1;
6700 }
6701 start += enc->minBytesPerChar * 2;
6702 tem = start + XmlNameLength(enc, start);
6703 target = poolStoreString(&parser->m_tempPool, enc, start, tem);
6704 if (! target)
6705 return 0;
6706 poolFinish(&parser->m_tempPool);
6707 data = poolStoreString(&parser->m_tempPool, enc, XmlSkipS(enc, tem),
6708 end - enc->minBytesPerChar * 2);
6709 if (! data)
6710 return 0;
6711 normalizeLines(data);
6712 parser->m_processingInstructionHandler(parser->m_handlerArg, target, data);
6713 poolClear(&parser->m_tempPool);
6714 return 1;
6715 }
6716
6717 static int
6718 reportComment(XML_Parser parser, const ENCODING *enc, const char *start,
6719 const char *end) {
6720 XML_Char *data;
6721 if (! parser->m_commentHandler) {
6722 if (parser->m_defaultHandler)
6723 reportDefault(parser, enc, start, end);
6724 return 1;
6725 }
6726 data = poolStoreString(&parser->m_tempPool, enc,
6727 start + enc->minBytesPerChar * 4,
6728 end - enc->minBytesPerChar * 3);
6729 if (! data)
6730 return 0;
6731 normalizeLines(data);
6732 parser->m_commentHandler(parser->m_handlerArg, data);
6733 poolClear(&parser->m_tempPool);
6734 return 1;
6735 }
6736
6737 static void
6738 reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
6739 const char *end) {
6740 if (MUST_CONVERT(enc, s)) {
6741 enum XML_Convert_Result convert_res;
6742 const char **eventPP;
6743 const char **eventEndPP;
6744 if (enc == parser->m_encoding) {
6745 eventPP = &parser->m_eventPtr;
6746 eventEndPP = &parser->m_eventEndPtr;
6747 } else {
6748 /* To get here, two things must be true; the parser must be
6749 * using a character encoding that is not the same as the
6750 * encoding passed in, and the encoding passed in must need
6751 * conversion to the internal format (UTF-8 unless XML_UNICODE
6752 * is defined). The only occasions on which the encoding passed
6753 * in is not the same as the parser's encoding are when it is
6754 * the internal encoding (e.g. a previously defined parameter
6755 * entity, already converted to internal format). This by
6756 * definition doesn't need conversion, so the whole branch never
6757 * gets executed.
6758 *
6759 * For safety's sake we don't delete these lines and merely
6760 * exclude them from coverage statistics.
6761 *
6762 * LCOV_EXCL_START
6763 */
6764 eventPP = &(parser->m_openInternalEntities->internalEventPtr);
6765 eventEndPP = &(parser->m_openInternalEntities->internalEventEndPtr);
6766 /* LCOV_EXCL_STOP */
6767 }
6768 do {
6769 ICHAR *dataPtr = (ICHAR *)parser->m_dataBuf;
6770 convert_res
6771 = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)parser->m_dataBufEnd);
6772 *eventEndPP = s;
6773 parser->m_defaultHandler(parser->m_handlerArg, parser->m_dataBuf,
6774 (int)(dataPtr - (ICHAR *)parser->m_dataBuf));
6775 *eventPP = s;
6776 } while ((convert_res != XML_CONVERT_COMPLETED)
6777 && (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
6778 } else
6779 parser->m_defaultHandler(
6780 parser->m_handlerArg, (const XML_Char *)s,
6781 (int)((const XML_Char *)end - (const XML_Char *)s));
6782 }
6783
6784 static int
6785 defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata,
6786 XML_Bool isId, const XML_Char *value, XML_Parser parser) {
6787 DEFAULT_ATTRIBUTE *att;
6788 if (value || isId) {
6789 /* The handling of default attributes gets messed up if we have
6790 a default which duplicates a non-default. */
6791 int i;
6792 for (i = 0; i < type->nDefaultAtts; i++)
6793 if (attId == type->defaultAtts[i].id)
6794 return 1;
6795 if (isId && ! type->idAtt && ! attId->xmlns)
6796 type->idAtt = attId;
6797 }
6798 if (type->nDefaultAtts == type->allocDefaultAtts) {
6799 if (type->allocDefaultAtts == 0) {
6800 type->allocDefaultAtts = 8;
6801 type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(
6802 parser, type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
6803 if (! type->defaultAtts) {
6804 type->allocDefaultAtts = 0;
6805 return 0;
6806 }
6807 } else {
6808 DEFAULT_ATTRIBUTE *temp;
6809
6810 /* Detect and prevent integer overflow */
6811 if (type->allocDefaultAtts > INT_MAX / 2) {
6812 return 0;
6813 }
6814
6815 int count = type->allocDefaultAtts * 2;
6816
6817 /* Detect and prevent integer overflow.
6818 * The preprocessor guard addresses the "always false" warning
6819 * from -Wtype-limits on platforms where
6820 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
6821 #if UINT_MAX >= SIZE_MAX
6822 if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) {
6823 return 0;
6824 }
6825 #endif
6826
6827 temp = (DEFAULT_ATTRIBUTE *)REALLOC(parser, type->defaultAtts,
6828 (count * sizeof(DEFAULT_ATTRIBUTE)));
6829 if (temp == NULL)
6830 return 0;
6831 type->allocDefaultAtts = count;
6832 type->defaultAtts = temp;
6833 }
6834 }
6835 att = type->defaultAtts + type->nDefaultAtts;
6836 att->id = attId;
6837 att->value = value;
6838 att->isCdata = isCdata;
6839 if (! isCdata)
6840 attId->maybeTokenized = XML_TRUE;
6841 type->nDefaultAtts += 1;
6842 return 1;
6843 }
6844
6845 static int
6846 setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) {
6847 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6848 const XML_Char *name;
6849 for (name = elementType->name; *name; name++) {
6850 if (*name == XML_T(ASCII_COLON)) {
6851 PREFIX *prefix;
6852 const XML_Char *s;
6853 for (s = elementType->name; s != name; s++) {
6854 if (! poolAppendChar(&dtd->pool, *s))
6855 return 0;
6856 }
6857 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6858 return 0;
6859 prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
6860 sizeof(PREFIX));
6861 if (! prefix)
6862 return 0;
6863 if (prefix->name == poolStart(&dtd->pool))
6864 poolFinish(&dtd->pool);
6865 else
6866 poolDiscard(&dtd->pool);
6867 elementType->prefix = prefix;
6868 break;
6869 }
6870 }
6871 return 1;
6872 }
6873
6874 static ATTRIBUTE_ID *
6875 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
6876 const char *end) {
6877 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6878 ATTRIBUTE_ID *id;
6879 const XML_Char *name;
6880 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6881 return NULL;
6882 name = poolStoreString(&dtd->pool, enc, start, end);
6883 if (! name)
6884 return NULL;
6885 /* skip quotation mark - its storage will be reused (like in name[-1]) */
6886 ++name;
6887 id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
6888 sizeof(ATTRIBUTE_ID));
6889 if (! id)
6890 return NULL;
6891 if (id->name != name)
6892 poolDiscard(&dtd->pool);
6893 else {
6894 poolFinish(&dtd->pool);
6895 if (! parser->m_ns)
6896 ;
6897 else if (name[0] == XML_T(ASCII_x) && name[1] == XML_T(ASCII_m)
6898 && name[2] == XML_T(ASCII_l) && name[3] == XML_T(ASCII_n)
6899 && name[4] == XML_T(ASCII_s)
6900 && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
6901 if (name[5] == XML_T('\0'))
6902 id->prefix = &dtd->defaultPrefix;
6903 else
6904 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6,
6905 sizeof(PREFIX));
6906 id->xmlns = XML_TRUE;
6907 } else {
6908 int i;
6909 for (i = 0; name[i]; i++) {
6910 /* attributes without prefix are *not* in the default namespace */
6911 if (name[i] == XML_T(ASCII_COLON)) {
6912 int j;
6913 for (j = 0; j < i; j++) {
6914 if (! poolAppendChar(&dtd->pool, name[j]))
6915 return NULL;
6916 }
6917 if (! poolAppendChar(&dtd->pool, XML_T('\0')))
6918 return NULL;
6919 id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes,
6920 poolStart(&dtd->pool), sizeof(PREFIX));
6921 if (! id->prefix)
6922 return NULL;
6923 if (id->prefix->name == poolStart(&dtd->pool))
6924 poolFinish(&dtd->pool);
6925 else
6926 poolDiscard(&dtd->pool);
6927 break;
6928 }
6929 }
6930 }
6931 }
6932 return id;
6933 }
6934
6935 #define CONTEXT_SEP XML_T(ASCII_FF)
6936
6937 static const XML_Char *
6938 getContext(XML_Parser parser) {
6939 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
6940 HASH_TABLE_ITER iter;
6941 XML_Bool needSep = XML_FALSE;
6942
6943 if (dtd->defaultPrefix.binding) {
6944 int i;
6945 int len;
6946 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
6947 return NULL;
6948 len = dtd->defaultPrefix.binding->uriLen;
6949 if (parser->m_namespaceSeparator)
6950 len--;
6951 for (i = 0; i < len; i++) {
6952 if (! poolAppendChar(&parser->m_tempPool,
6953 dtd->defaultPrefix.binding->uri[i])) {
6954 /* Because of memory caching, I don't believe this line can be
6955 * executed.
6956 *
6957 * This is part of a loop copying the default prefix binding
6958 * URI into the parser's temporary string pool. Previously,
6959 * that URI was copied into the same string pool, with a
6960 * terminating NUL character, as part of setContext(). When
6961 * the pool was cleared, that leaves a block definitely big
6962 * enough to hold the URI on the free block list of the pool.
6963 * The URI copy in getContext() therefore cannot run out of
6964 * memory.
6965 *
6966 * If the pool is used between the setContext() and
6967 * getContext() calls, the worst it can do is leave a bigger
6968 * block on the front of the free list. Given that this is
6969 * all somewhat inobvious and program logic can be changed, we
6970 * don't delete the line but we do exclude it from the test
6971 * coverage statistics.
6972 */
6973 return NULL; /* LCOV_EXCL_LINE */
6974 }
6975 }
6976 needSep = XML_TRUE;
6977 }
6978
6979 hashTableIterInit(&iter, &(dtd->prefixes));
6980 for (;;) {
6981 int i;
6982 int len;
6983 const XML_Char *s;
6984 PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
6985 if (! prefix)
6986 break;
6987 if (! prefix->binding) {
6988 /* This test appears to be (justifiable) paranoia. There does
6989 * not seem to be a way of injecting a prefix without a binding
6990 * that doesn't get errored long before this function is called.
6991 * The test should remain for safety's sake, so we instead
6992 * exclude the following line from the coverage statistics.
6993 */
6994 continue; /* LCOV_EXCL_LINE */
6995 }
6996 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
6997 return NULL;
6998 for (s = prefix->name; *s; s++)
6999 if (! poolAppendChar(&parser->m_tempPool, *s))
7000 return NULL;
7001 if (! poolAppendChar(&parser->m_tempPool, XML_T(ASCII_EQUALS)))
7002 return NULL;
7003 len = prefix->binding->uriLen;
7004 if (parser->m_namespaceSeparator)
7005 len--;
7006 for (i = 0; i < len; i++)
7007 if (! poolAppendChar(&parser->m_tempPool, prefix->binding->uri[i]))
7008 return NULL;
7009 needSep = XML_TRUE;
7010 }
7011
7012 hashTableIterInit(&iter, &(dtd->generalEntities));
7013 for (;;) {
7014 const XML_Char *s;
7015 ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
7016 if (! e)
7017 break;
7018 if (! e->open)
7019 continue;
7020 if (needSep && ! poolAppendChar(&parser->m_tempPool, CONTEXT_SEP))
7021 return NULL;
7022 for (s = e->name; *s; s++)
7023 if (! poolAppendChar(&parser->m_tempPool, *s))
7024 return 0;
7025 needSep = XML_TRUE;
7026 }
7027
7028 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7029 return NULL;
7030 return parser->m_tempPool.start;
7031 }
7032
7033 static XML_Bool
7034 setContext(XML_Parser parser, const XML_Char *context) {
7035 if (context == NULL) {
7036 return XML_FALSE;
7037 }
7038
7039 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7040 const XML_Char *s = context;
7041
7042 while (*context != XML_T('\0')) {
7043 if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
7044 ENTITY *e;
7045 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7046 return XML_FALSE;
7047 e = (ENTITY *)lookup(parser, &dtd->generalEntities,
7048 poolStart(&parser->m_tempPool), 0);
7049 if (e)
7050 e->open = XML_TRUE;
7051 if (*s != XML_T('\0'))
7052 s++;
7053 context = s;
7054 poolDiscard(&parser->m_tempPool);
7055 } else if (*s == XML_T(ASCII_EQUALS)) {
7056 PREFIX *prefix;
7057 if (poolLength(&parser->m_tempPool) == 0)
7058 prefix = &dtd->defaultPrefix;
7059 else {
7060 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7061 return XML_FALSE;
7062 prefix
7063 = (PREFIX *)lookup(parser, &dtd->prefixes,
7064 poolStart(&parser->m_tempPool), sizeof(PREFIX));
7065 if (! prefix)
7066 return XML_FALSE;
7067 if (prefix->name == poolStart(&parser->m_tempPool)) {
7068 prefix->name = poolCopyString(&dtd->pool, prefix->name);
7069 if (! prefix->name)
7070 return XML_FALSE;
7071 }
7072 poolDiscard(&parser->m_tempPool);
7073 }
7074 for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0');
7075 context++)
7076 if (! poolAppendChar(&parser->m_tempPool, *context))
7077 return XML_FALSE;
7078 if (! poolAppendChar(&parser->m_tempPool, XML_T('\0')))
7079 return XML_FALSE;
7080 if (addBinding(parser, prefix, NULL, poolStart(&parser->m_tempPool),
7081 &parser->m_inheritedBindings)
7082 != XML_ERROR_NONE)
7083 return XML_FALSE;
7084 poolDiscard(&parser->m_tempPool);
7085 if (*context != XML_T('\0'))
7086 ++context;
7087 s = context;
7088 } else {
7089 if (! poolAppendChar(&parser->m_tempPool, *s))
7090 return XML_FALSE;
7091 s++;
7092 }
7093 }
7094 return XML_TRUE;
7095 }
7096
7097 static void FASTCALL
7098 normalizePublicId(XML_Char *publicId) {
7099 XML_Char *p = publicId;
7100 XML_Char *s;
7101 for (s = publicId; *s; s++) {
7102 switch (*s) {
7103 case 0x20:
7104 case 0xD:
7105 case 0xA:
7106 if (p != publicId && p[-1] != 0x20)
7107 *p++ = 0x20;
7108 break;
7109 default:
7110 *p++ = *s;
7111 }
7112 }
7113 if (p != publicId && p[-1] == 0x20)
7114 --p;
7115 *p = XML_T('\0');
7116 }
7117
7118 static DTD *
7119 dtdCreate(const XML_Memory_Handling_Suite *ms) {
7120 DTD *p = ms->malloc_fcn(sizeof(DTD));
7121 if (p == NULL)
7122 return p;
7123 poolInit(&(p->pool), ms);
7124 poolInit(&(p->entityValuePool), ms);
7125 hashTableInit(&(p->generalEntities), ms);
7126 hashTableInit(&(p->elementTypes), ms);
7127 hashTableInit(&(p->attributeIds), ms);
7128 hashTableInit(&(p->prefixes), ms);
7129 #ifdef XML_DTD
7130 p->paramEntityRead = XML_FALSE;
7131 hashTableInit(&(p->paramEntities), ms);
7132 #endif /* XML_DTD */
7133 p->defaultPrefix.name = NULL;
7134 p->defaultPrefix.binding = NULL;
7135
7136 p->in_eldecl = XML_FALSE;
7137 p->scaffIndex = NULL;
7138 p->scaffold = NULL;
7139 p->scaffLevel = 0;
7140 p->scaffSize = 0;
7141 p->scaffCount = 0;
7142 p->contentStringLen = 0;
7143
7144 p->keepProcessing = XML_TRUE;
7145 p->hasParamEntityRefs = XML_FALSE;
7146 p->standalone = XML_FALSE;
7147 return p;
7148 }
7149
7150 static void
7151 dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) {
7152 HASH_TABLE_ITER iter;
7153 hashTableIterInit(&iter, &(p->elementTypes));
7154 for (;;) {
7155 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7156 if (! e)
7157 break;
7158 if (e->allocDefaultAtts != 0)
7159 ms->free_fcn(e->defaultAtts);
7160 }
7161 hashTableClear(&(p->generalEntities));
7162 #ifdef XML_DTD
7163 p->paramEntityRead = XML_FALSE;
7164 hashTableClear(&(p->paramEntities));
7165 #endif /* XML_DTD */
7166 hashTableClear(&(p->elementTypes));
7167 hashTableClear(&(p->attributeIds));
7168 hashTableClear(&(p->prefixes));
7169 poolClear(&(p->pool));
7170 poolClear(&(p->entityValuePool));
7171 p->defaultPrefix.name = NULL;
7172 p->defaultPrefix.binding = NULL;
7173
7174 p->in_eldecl = XML_FALSE;
7175
7176 ms->free_fcn(p->scaffIndex);
7177 p->scaffIndex = NULL;
7178 ms->free_fcn(p->scaffold);
7179 p->scaffold = NULL;
7180
7181 p->scaffLevel = 0;
7182 p->scaffSize = 0;
7183 p->scaffCount = 0;
7184 p->contentStringLen = 0;
7185
7186 p->keepProcessing = XML_TRUE;
7187 p->hasParamEntityRefs = XML_FALSE;
7188 p->standalone = XML_FALSE;
7189 }
7190
7191 static void
7192 dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) {
7193 HASH_TABLE_ITER iter;
7194 hashTableIterInit(&iter, &(p->elementTypes));
7195 for (;;) {
7196 ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7197 if (! e)
7198 break;
7199 if (e->allocDefaultAtts != 0)
7200 ms->free_fcn(e->defaultAtts);
7201 }
7202 hashTableDestroy(&(p->generalEntities));
7203 #ifdef XML_DTD
7204 hashTableDestroy(&(p->paramEntities));
7205 #endif /* XML_DTD */
7206 hashTableDestroy(&(p->elementTypes));
7207 hashTableDestroy(&(p->attributeIds));
7208 hashTableDestroy(&(p->prefixes));
7209 poolDestroy(&(p->pool));
7210 poolDestroy(&(p->entityValuePool));
7211 if (isDocEntity) {
7212 ms->free_fcn(p->scaffIndex);
7213 ms->free_fcn(p->scaffold);
7214 }
7215 ms->free_fcn(p);
7216 }
7217
7218 /* Do a deep copy of the DTD. Return 0 for out of memory, non-zero otherwise.
7219 The new DTD has already been initialized.
7220 */
7221 static int
7222 dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
7223 const XML_Memory_Handling_Suite *ms) {
7224 HASH_TABLE_ITER iter;
7225
7226 /* Copy the prefix table. */
7227
7228 hashTableIterInit(&iter, &(oldDtd->prefixes));
7229 for (;;) {
7230 const XML_Char *name;
7231 const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
7232 if (! oldP)
7233 break;
7234 name = poolCopyString(&(newDtd->pool), oldP->name);
7235 if (! name)
7236 return 0;
7237 if (! lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX)))
7238 return 0;
7239 }
7240
7241 hashTableIterInit(&iter, &(oldDtd->attributeIds));
7242
7243 /* Copy the attribute id table. */
7244
7245 for (;;) {
7246 ATTRIBUTE_ID *newA;
7247 const XML_Char *name;
7248 const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
7249
7250 if (! oldA)
7251 break;
7252 /* Remember to allocate the scratch byte before the name. */
7253 if (! poolAppendChar(&(newDtd->pool), XML_T('\0')))
7254 return 0;
7255 name = poolCopyString(&(newDtd->pool), oldA->name);
7256 if (! name)
7257 return 0;
7258 ++name;
7259 newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name,
7260 sizeof(ATTRIBUTE_ID));
7261 if (! newA)
7262 return 0;
7263 newA->maybeTokenized = oldA->maybeTokenized;
7264 if (oldA->prefix) {
7265 newA->xmlns = oldA->xmlns;
7266 if (oldA->prefix == &oldDtd->defaultPrefix)
7267 newA->prefix = &newDtd->defaultPrefix;
7268 else
7269 newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7270 oldA->prefix->name, 0);
7271 }
7272 }
7273
7274 /* Copy the element type table. */
7275
7276 hashTableIterInit(&iter, &(oldDtd->elementTypes));
7277
7278 for (;;) {
7279 int i;
7280 ELEMENT_TYPE *newE;
7281 const XML_Char *name;
7282 const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
7283 if (! oldE)
7284 break;
7285 name = poolCopyString(&(newDtd->pool), oldE->name);
7286 if (! name)
7287 return 0;
7288 newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name,
7289 sizeof(ELEMENT_TYPE));
7290 if (! newE)
7291 return 0;
7292 if (oldE->nDefaultAtts) {
7293 /* Detect and prevent integer overflow.
7294 * The preprocessor guard addresses the "always false" warning
7295 * from -Wtype-limits on platforms where
7296 * sizeof(int) < sizeof(size_t), e.g. on x86_64. */
7297 #if UINT_MAX >= SIZE_MAX
7298 if ((size_t)oldE->nDefaultAtts
7299 > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) {
7300 return 0;
7301 }
7302 #endif
7303 newE->defaultAtts
7304 = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
7305 if (! newE->defaultAtts) {
7306 return 0;
7307 }
7308 }
7309 if (oldE->idAtt)
7310 newE->idAtt = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds),
7311 oldE->idAtt->name, 0);
7312 newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
7313 if (oldE->prefix)
7314 newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes),
7315 oldE->prefix->name, 0);
7316 for (i = 0; i < newE->nDefaultAtts; i++) {
7317 newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(
7318 oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
7319 newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
7320 if (oldE->defaultAtts[i].value) {
7321 newE->defaultAtts[i].value
7322 = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
7323 if (! newE->defaultAtts[i].value)
7324 return 0;
7325 } else
7326 newE->defaultAtts[i].value = NULL;
7327 }
7328 }
7329
7330 /* Copy the entity tables. */
7331 if (! copyEntityTable(oldParser, &(newDtd->generalEntities), &(newDtd->pool),
7332 &(oldDtd->generalEntities)))
7333 return 0;
7334
7335 #ifdef XML_DTD
7336 if (! copyEntityTable(oldParser, &(newDtd->paramEntities), &(newDtd->pool),
7337 &(oldDtd->paramEntities)))
7338 return 0;
7339 newDtd->paramEntityRead = oldDtd->paramEntityRead;
7340 #endif /* XML_DTD */
7341
7342 newDtd->keepProcessing = oldDtd->keepProcessing;
7343 newDtd->hasParamEntityRefs = oldDtd->hasParamEntityRefs;
7344 newDtd->standalone = oldDtd->standalone;
7345
7346 /* Don't want deep copying for scaffolding */
7347 newDtd->in_eldecl = oldDtd->in_eldecl;
7348 newDtd->scaffold = oldDtd->scaffold;
7349 newDtd->contentStringLen = oldDtd->contentStringLen;
7350 newDtd->scaffSize = oldDtd->scaffSize;
7351 newDtd->scaffLevel = oldDtd->scaffLevel;
7352 newDtd->scaffIndex = oldDtd->scaffIndex;
7353
7354 return 1;
7355 } /* End dtdCopy */
7356
7357 static int
7358 copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
7359 STRING_POOL *newPool, const HASH_TABLE *oldTable) {
7360 HASH_TABLE_ITER iter;
7361 const XML_Char *cachedOldBase = NULL;
7362 const XML_Char *cachedNewBase = NULL;
7363
7364 hashTableIterInit(&iter, oldTable);
7365
7366 for (;;) {
7367 ENTITY *newE;
7368 const XML_Char *name;
7369 const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
7370 if (! oldE)
7371 break;
7372 name = poolCopyString(newPool, oldE->name);
7373 if (! name)
7374 return 0;
7375 newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY));
7376 if (! newE)
7377 return 0;
7378 if (oldE->systemId) {
7379 const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
7380 if (! tem)
7381 return 0;
7382 newE->systemId = tem;
7383 if (oldE->base) {
7384 if (oldE->base == cachedOldBase)
7385 newE->base = cachedNewBase;
7386 else {
7387 cachedOldBase = oldE->base;
7388 tem = poolCopyString(newPool, cachedOldBase);
7389 if (! tem)
7390 return 0;
7391 cachedNewBase = newE->base = tem;
7392 }
7393 }
7394 if (oldE->publicId) {
7395 tem = poolCopyString(newPool, oldE->publicId);
7396 if (! tem)
7397 return 0;
7398 newE->publicId = tem;
7399 }
7400 } else {
7401 const XML_Char *tem
7402 = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
7403 if (! tem)
7404 return 0;
7405 newE->textPtr = tem;
7406 newE->textLen = oldE->textLen;
7407 }
7408 if (oldE->notation) {
7409 const XML_Char *tem = poolCopyString(newPool, oldE->notation);
7410 if (! tem)
7411 return 0;
7412 newE->notation = tem;
7413 }
7414 newE->is_param = oldE->is_param;
7415 newE->is_internal = oldE->is_internal;
7416 }
7417 return 1;
7418 }
7419
7420 #define INIT_POWER 6
7421
7422 static XML_Bool FASTCALL
7423 keyeq(KEY s1, KEY s2) {
7424 for (; *s1 == *s2; s1++, s2++)
7425 if (*s1 == 0)
7426 return XML_TRUE;
7427 return XML_FALSE;
7428 }
7429
7430 static size_t
7431 keylen(KEY s) {
7432 size_t len = 0;
7433 for (; *s; s++, len++)
7434 ;
7435 return len;
7436 }
7437
7438 static void
7439 copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key) {
7440 key->k[0] = 0;
7441 key->k[1] = get_hash_secret_salt(parser);
7442 }
7443
7444 static unsigned long FASTCALL
7445 hash(XML_Parser parser, KEY s) {
7446 struct siphash state;
7447 struct sipkey key;
7448 (void)sip24_valid;
7449 copy_salt_to_sipkey(parser, &key);
7450 sip24_init(&state, &key);
7451 sip24_update(&state, s, keylen(s) * sizeof(XML_Char));
7452 return (unsigned long)sip24_final(&state);
7453 }
7454
7455 static NAMED *
7456 lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) {
7457 size_t i;
7458 if (table->size == 0) {
7459 size_t tsize;
7460 if (! createSize)
7461 return NULL;
7462 table->power = INIT_POWER;
7463 /* table->size is a power of 2 */
7464 table->size = (size_t)1 << INIT_POWER;
7465 tsize = table->size * sizeof(NAMED *);
7466 table->v = table->mem->malloc_fcn(tsize);
7467 if (! table->v) {
7468 table->size = 0;
7469 return NULL;
7470 }
7471 memset(table->v, 0, tsize);
7472 i = hash(parser, name) & ((unsigned long)table->size - 1);
7473 } else {
7474 unsigned long h = hash(parser, name);
7475 unsigned long mask = (unsigned long)table->size - 1;
7476 unsigned char step = 0;
7477 i = h & mask;
7478 while (table->v[i]) {
7479 if (keyeq(name, table->v[i]->name))
7480 return table->v[i];
7481 if (! step)
7482 step = PROBE_STEP(h, mask, table->power);
7483 i < step ? (i += table->size - step) : (i -= step);
7484 }
7485 if (! createSize)
7486 return NULL;
7487
7488 /* check for overflow (table is half full) */
7489 if (table->used >> (table->power - 1)) {
7490 unsigned char newPower = table->power + 1;
7491
7492 /* Detect and prevent invalid shift */
7493 if (newPower >= sizeof(unsigned long) * 8 /* bits per byte */) {
7494 return NULL;
7495 }
7496
7497 size_t newSize = (size_t)1 << newPower;
7498 unsigned long newMask = (unsigned long)newSize - 1;
7499
7500 /* Detect and prevent integer overflow */
7501 if (newSize > (size_t)(-1) / sizeof(NAMED *)) {
7502 return NULL;
7503 }
7504
7505 size_t tsize = newSize * sizeof(NAMED *);
7506 NAMED **newV = table->mem->malloc_fcn(tsize);
7507 if (! newV)
7508 return NULL;
7509 memset(newV, 0, tsize);
7510 for (i = 0; i < table->size; i++)
7511 if (table->v[i]) {
7512 unsigned long newHash = hash(parser, table->v[i]->name);
7513 size_t j = newHash & newMask;
7514 step = 0;
7515 while (newV[j]) {
7516 if (! step)
7517 step = PROBE_STEP(newHash, newMask, newPower);
7518 j < step ? (j += newSize - step) : (j -= step);
7519 }
7520 newV[j] = table->v[i];
7521 }
7522 table->mem->free_fcn(table->v);
7523 table->v = newV;
7524 table->power = newPower;
7525 table->size = newSize;
7526 i = h & newMask;
7527 step = 0;
7528 while (table->v[i]) {
7529 if (! step)
7530 step = PROBE_STEP(h, newMask, newPower);
7531 i < step ? (i += newSize - step) : (i -= step);
7532 }
7533 }
7534 }
7535 table->v[i] = table->mem->malloc_fcn(createSize);
7536 if (! table->v[i])
7537 return NULL;
7538 memset(table->v[i], 0, createSize);
7539 table->v[i]->name = name;
7540 (table->used)++;
7541 return table->v[i];
7542 }
7543
7544 static void FASTCALL
7545 hashTableClear(HASH_TABLE *table) {
7546 size_t i;
7547 for (i = 0; i < table->size; i++) {
7548 table->mem->free_fcn(table->v[i]);
7549 table->v[i] = NULL;
7550 }
7551 table->used = 0;
7552 }
7553
7554 static void FASTCALL
7555 hashTableDestroy(HASH_TABLE *table) {
7556 size_t i;
7557 for (i = 0; i < table->size; i++)
7558 table->mem->free_fcn(table->v[i]);
7559 table->mem->free_fcn(table->v);
7560 }
7561
7562 static void FASTCALL
7563 hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) {
7564 p->power = 0;
7565 p->size = 0;
7566 p->used = 0;
7567 p->v = NULL;
7568 p->mem = ms;
7569 }
7570
7571 static void FASTCALL
7572 hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) {
7573 iter->p = table->v;
7574 iter->end = iter->p ? iter->p + table->size : NULL;
7575 }
7576
7577 static NAMED *FASTCALL
7578 hashTableIterNext(HASH_TABLE_ITER *iter) {
7579 while (iter->p != iter->end) {
7580 NAMED *tem = *(iter->p)++;
7581 if (tem)
7582 return tem;
7583 }
7584 return NULL;
7585 }
7586
7587 static void FASTCALL
7588 poolInit(STRING_POOL *pool, const XML_Memory_Handling_Suite *ms) {
7589 pool->blocks = NULL;
7590 pool->freeBlocks = NULL;
7591 pool->start = NULL;
7592 pool->ptr = NULL;
7593 pool->end = NULL;
7594 pool->mem = ms;
7595 }
7596
7597 static void FASTCALL
7598 poolClear(STRING_POOL *pool) {
7599 if (! pool->freeBlocks)
7600 pool->freeBlocks = pool->blocks;
7601 else {
7602 BLOCK *p = pool->blocks;
7603 while (p) {
7604 BLOCK *tem = p->next;
7605 p->next = pool->freeBlocks;
7606 pool->freeBlocks = p;
7607 p = tem;
7608 }
7609 }
7610 pool->blocks = NULL;
7611 pool->start = NULL;
7612 pool->ptr = NULL;
7613 pool->end = NULL;
7614 }
7615
7616 static void FASTCALL
7617 poolDestroy(STRING_POOL *pool) {
7618 BLOCK *p = pool->blocks;
7619 while (p) {
7620 BLOCK *tem = p->next;
7621 pool->mem->free_fcn(p);
7622 p = tem;
7623 }
7624 p = pool->freeBlocks;
7625 while (p) {
7626 BLOCK *tem = p->next;
7627 pool->mem->free_fcn(p);
7628 p = tem;
7629 }
7630 }
7631
7632 static XML_Char *
7633 poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7634 const char *end) {
7635 if (! pool->ptr && ! poolGrow(pool))
7636 return NULL;
7637 for (;;) {
7638 const enum XML_Convert_Result convert_res = XmlConvert(
7639 enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
7640 if ((convert_res == XML_CONVERT_COMPLETED)
7641 || (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
7642 break;
7643 if (! poolGrow(pool))
7644 return NULL;
7645 }
7646 return pool->start;
7647 }
7648
7649 static const XML_Char *FASTCALL
7650 poolCopyString(STRING_POOL *pool, const XML_Char *s) {
7651 do {
7652 if (! poolAppendChar(pool, *s))
7653 return NULL;
7654 } while (*s++);
7655 s = pool->start;
7656 poolFinish(pool);
7657 return s;
7658 }
7659
7660 static const XML_Char *
7661 poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) {
7662 if (! pool->ptr && ! poolGrow(pool)) {
7663 /* The following line is unreachable given the current usage of
7664 * poolCopyStringN(). Currently it is called from exactly one
7665 * place to copy the text of a simple general entity. By that
7666 * point, the name of the entity is already stored in the pool, so
7667 * pool->ptr cannot be NULL.
7668 *
7669 * If poolCopyStringN() is used elsewhere as it well might be,
7670 * this line may well become executable again. Regardless, this
7671 * sort of check shouldn't be removed lightly, so we just exclude
7672 * it from the coverage statistics.
7673 */
7674 return NULL; /* LCOV_EXCL_LINE */
7675 }
7676 for (; n > 0; --n, s++) {
7677 if (! poolAppendChar(pool, *s))
7678 return NULL;
7679 }
7680 s = pool->start;
7681 poolFinish(pool);
7682 return s;
7683 }
7684
7685 static const XML_Char *FASTCALL
7686 poolAppendString(STRING_POOL *pool, const XML_Char *s) {
7687 while (*s) {
7688 if (! poolAppendChar(pool, *s))
7689 return NULL;
7690 s++;
7691 }
7692 return pool->start;
7693 }
7694
7695 static XML_Char *
7696 poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
7697 const char *end) {
7698 if (! poolAppend(pool, enc, ptr, end))
7699 return NULL;
7700 if (pool->ptr == pool->end && ! poolGrow(pool))
7701 return NULL;
7702 *(pool->ptr)++ = 0;
7703 return pool->start;
7704 }
7705
7706 static size_t
7707 poolBytesToAllocateFor(int blockSize) {
7708 /* Unprotected math would be:
7709 ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char);
7710 **
7711 ** Detect overflow, avoiding _signed_ overflow undefined behavior
7712 ** For a + b * c we check b * c in isolation first, so that addition of a
7713 ** on top has no chance of making us accept a small non-negative number
7714 */
7715 const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */
7716
7717 if (blockSize <= 0)
7718 return 0;
7719
7720 if (blockSize > (int)(INT_MAX / stretch))
7721 return 0;
7722
7723 {
7724 const int stretchedBlockSize = blockSize * (int)stretch;
7725 const int bytesToAllocate
7726 = (int)(offsetof(BLOCK, s) + (unsigned)stretchedBlockSize);
7727 if (bytesToAllocate < 0)
7728 return 0;
7729
7730 return (size_t)bytesToAllocate;
7731 }
7732 }
7733
7734 static XML_Bool FASTCALL
7735 poolGrow(STRING_POOL *pool) {
7736 if (pool->freeBlocks) {
7737 if (pool->start == 0) {
7738 pool->blocks = pool->freeBlocks;
7739 pool->freeBlocks = pool->freeBlocks->next;
7740 pool->blocks->next = NULL;
7741 pool->start = pool->blocks->s;
7742 pool->end = pool->start + pool->blocks->size;
7743 pool->ptr = pool->start;
7744 return XML_TRUE;
7745 }
7746 if (pool->end - pool->start < pool->freeBlocks->size) {
7747 BLOCK *tem = pool->freeBlocks->next;
7748 pool->freeBlocks->next = pool->blocks;
7749 pool->blocks = pool->freeBlocks;
7750 pool->freeBlocks = tem;
7751 memcpy(pool->blocks->s, pool->start,
7752 (pool->end - pool->start) * sizeof(XML_Char));
7753 pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
7754 pool->start = pool->blocks->s;
7755 pool->end = pool->start + pool->blocks->size;
7756 return XML_TRUE;
7757 }
7758 }
7759 if (pool->blocks && pool->start == pool->blocks->s) {
7760 BLOCK *temp;
7761 int blockSize = (int)((unsigned)(pool->end - pool->start) * 2U);
7762 size_t bytesToAllocate;
7763
7764 /* NOTE: Needs to be calculated prior to calling `realloc`
7765 to avoid dangling pointers: */
7766 const ptrdiff_t offsetInsideBlock = pool->ptr - pool->start;
7767
7768 if (blockSize < 0) {
7769 /* This condition traps a situation where either more than
7770 * INT_MAX/2 bytes have already been allocated. This isn't
7771 * readily testable, since it is unlikely that an average
7772 * machine will have that much memory, so we exclude it from the
7773 * coverage statistics.
7774 */
7775 return XML_FALSE; /* LCOV_EXCL_LINE */
7776 }
7777
7778 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7779 if (bytesToAllocate == 0)
7780 return XML_FALSE;
7781
7782 temp = (BLOCK *)pool->mem->realloc_fcn(pool->blocks,
7783 (unsigned)bytesToAllocate);
7784 if (temp == NULL)
7785 return XML_FALSE;
7786 pool->blocks = temp;
7787 pool->blocks->size = blockSize;
7788 pool->ptr = pool->blocks->s + offsetInsideBlock;
7789 pool->start = pool->blocks->s;
7790 pool->end = pool->start + blockSize;
7791 } else {
7792 BLOCK *tem;
7793 int blockSize = (int)(pool->end - pool->start);
7794 size_t bytesToAllocate;
7795
7796 if (blockSize < 0) {
7797 /* This condition traps a situation where either more than
7798 * INT_MAX bytes have already been allocated (which is prevented
7799 * by various pieces of program logic, not least this one, never
7800 * mind the unlikelihood of actually having that much memory) or
7801 * the pool control fields have been corrupted (which could
7802 * conceivably happen in an extremely buggy user handler
7803 * function). Either way it isn't readily testable, so we
7804 * exclude it from the coverage statistics.
7805 */
7806 return XML_FALSE; /* LCOV_EXCL_LINE */
7807 }
7808
7809 if (blockSize < INIT_BLOCK_SIZE)
7810 blockSize = INIT_BLOCK_SIZE;
7811 else {
7812 /* Detect overflow, avoiding _signed_ overflow undefined behavior */
7813 if ((int)((unsigned)blockSize * 2U) < 0) {
7814 return XML_FALSE;
7815 }
7816 blockSize *= 2;
7817 }
7818
7819 bytesToAllocate = poolBytesToAllocateFor(blockSize);
7820 if (bytesToAllocate == 0)
7821 return XML_FALSE;
7822
7823 tem = pool->mem->malloc_fcn(bytesToAllocate);
7824 if (! tem)
7825 return XML_FALSE;
7826 tem->size = blockSize;
7827 tem->next = pool->blocks;
7828 pool->blocks = tem;
7829 if (pool->ptr != pool->start)
7830 memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
7831 pool->ptr = tem->s + (pool->ptr - pool->start);
7832 pool->start = tem->s;
7833 pool->end = tem->s + blockSize;
7834 }
7835 return XML_TRUE;
7836 }
7837
7838 static int FASTCALL
7839 nextScaffoldPart(XML_Parser parser) {
7840 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7841 CONTENT_SCAFFOLD *me;
7842 int next;
7843
7844 if (! dtd->scaffIndex) {
7845 /* Detect and prevent integer overflow.
7846 * The preprocessor guard addresses the "always false" warning
7847 * from -Wtype-limits on platforms where
7848 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7849 #if UINT_MAX >= SIZE_MAX
7850 if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) {
7851 return -1;
7852 }
7853 #endif
7854 dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int));
7855 if (! dtd->scaffIndex)
7856 return -1;
7857 dtd->scaffIndex[0] = 0;
7858 }
7859
7860 if (dtd->scaffCount >= dtd->scaffSize) {
7861 CONTENT_SCAFFOLD *temp;
7862 if (dtd->scaffold) {
7863 /* Detect and prevent integer overflow */
7864 if (dtd->scaffSize > UINT_MAX / 2u) {
7865 return -1;
7866 }
7867 /* Detect and prevent integer overflow.
7868 * The preprocessor guard addresses the "always false" warning
7869 * from -Wtype-limits on platforms where
7870 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7871 #if UINT_MAX >= SIZE_MAX
7872 if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) {
7873 return -1;
7874 }
7875 #endif
7876
7877 temp = (CONTENT_SCAFFOLD *)REALLOC(
7878 parser, dtd->scaffold, dtd->scaffSize * 2 * sizeof(CONTENT_SCAFFOLD));
7879 if (temp == NULL)
7880 return -1;
7881 dtd->scaffSize *= 2;
7882 } else {
7883 temp = (CONTENT_SCAFFOLD *)MALLOC(parser, INIT_SCAFFOLD_ELEMENTS
7884 * sizeof(CONTENT_SCAFFOLD));
7885 if (temp == NULL)
7886 return -1;
7887 dtd->scaffSize = INIT_SCAFFOLD_ELEMENTS;
7888 }
7889 dtd->scaffold = temp;
7890 }
7891 next = dtd->scaffCount++;
7892 me = &dtd->scaffold[next];
7893 if (dtd->scaffLevel) {
7894 CONTENT_SCAFFOLD *parent
7895 = &dtd->scaffold[dtd->scaffIndex[dtd->scaffLevel - 1]];
7896 if (parent->lastchild) {
7897 dtd->scaffold[parent->lastchild].nextsib = next;
7898 }
7899 if (! parent->childcnt)
7900 parent->firstchild = next;
7901 parent->lastchild = next;
7902 parent->childcnt++;
7903 }
7904 me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0;
7905 return next;
7906 }
7907
7908 static XML_Content *
7909 build_model(XML_Parser parser) {
7910 /* Function build_model transforms the existing parser->m_dtd->scaffold
7911 * array of CONTENT_SCAFFOLD tree nodes into a new array of
7912 * XML_Content tree nodes followed by a gapless list of zero-terminated
7913 * strings. */
7914 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
7915 XML_Content *ret;
7916 XML_Char *str; /* the current string writing location */
7917
7918 /* Detect and prevent integer overflow.
7919 * The preprocessor guard addresses the "always false" warning
7920 * from -Wtype-limits on platforms where
7921 * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */
7922 #if UINT_MAX >= SIZE_MAX
7923 if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) {
7924 return NULL;
7925 }
7926 if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) {
7927 return NULL;
7928 }
7929 #endif
7930 if (dtd->scaffCount * sizeof(XML_Content)
7931 > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) {
7932 return NULL;
7933 }
7934
7935 const size_t allocsize = (dtd->scaffCount * sizeof(XML_Content)
7936 + (dtd->contentStringLen * sizeof(XML_Char)));
7937
7938 ret = (XML_Content *)MALLOC(parser, allocsize);
7939 if (! ret)
7940 return NULL;
7941
7942 /* What follows is an iterative implementation (of what was previously done
7943 * recursively in a dedicated function called "build_node". The old recursive
7944 * build_node could be forced into stack exhaustion from input as small as a
7945 * few megabyte, and so that was a security issue. Hence, a function call
7946 * stack is avoided now by resolving recursion.)
7947 *
7948 * The iterative approach works as follows:
7949 *
7950 * - We have two writing pointers, both walking up the result array; one does
7951 * the work, the other creates "jobs" for its colleague to do, and leads
7952 * the way:
7953 *
7954 * - The faster one, pointer jobDest, always leads and writes "what job
7955 * to do" by the other, once they reach that place in the
7956 * array: leader "jobDest" stores the source node array index (relative
7957 * to array dtd->scaffold) in field "numchildren".
7958 *
7959 * - The slower one, pointer dest, looks at the value stored in the
7960 * "numchildren" field (which actually holds a source node array index
7961 * at that time) and puts the real data from dtd->scaffold in.
7962 *
7963 * - Before the loop starts, jobDest writes source array index 0
7964 * (where the root node is located) so that dest will have something to do
7965 * when it starts operation.
7966 *
7967 * - Whenever nodes with children are encountered, jobDest appends
7968 * them as new jobs, in order. As a result, tree node siblings are
7969 * adjacent in the resulting array, for example:
7970 *
7971 * [0] root, has two children
7972 * [1] first child of 0, has three children
7973 * [3] first child of 1, does not have children
7974 * [4] second child of 1, does not have children
7975 * [5] third child of 1, does not have children
7976 * [2] second child of 0, does not have children
7977 *
7978 * Or (the same data) presented in flat array view:
7979 *
7980 * [0] root, has two children
7981 *
7982 * [1] first child of 0, has three children
7983 * [2] second child of 0, does not have children
7984 *
7985 * [3] first child of 1, does not have children
7986 * [4] second child of 1, does not have children
7987 * [5] third child of 1, does not have children
7988 *
7989 * - The algorithm repeats until all target array indices have been processed.
7990 */
7991 XML_Content *dest = ret; /* tree node writing location, moves upwards */
7992 XML_Content *const destLimit = &ret[dtd->scaffCount];
7993 XML_Content *jobDest = ret; /* next free writing location in target array */
7994 str = (XML_Char *)&ret[dtd->scaffCount];
7995
7996 /* Add the starting job, the root node (index 0) of the source tree */
7997 (jobDest++)->numchildren = 0;
7998
7999 for (; dest < destLimit; dest++) {
8000 /* Retrieve source tree array index from job storage */
8001 const int src_node = (int)dest->numchildren;
8002
8003 /* Convert item */
8004 dest->type = dtd->scaffold[src_node].type;
8005 dest->quant = dtd->scaffold[src_node].quant;
8006 if (dest->type == XML_CTYPE_NAME) {
8007 const XML_Char *src;
8008 dest->name = str;
8009 src = dtd->scaffold[src_node].name;
8010 for (;;) {
8011 *str++ = *src;
8012 if (! *src)
8013 break;
8014 src++;
8015 }
8016 dest->numchildren = 0;
8017 dest->children = NULL;
8018 } else {
8019 unsigned int i;
8020 int cn;
8021 dest->name = NULL;
8022 dest->numchildren = dtd->scaffold[src_node].childcnt;
8023 dest->children = jobDest;
8024
8025 /* Append scaffold indices of children to array */
8026 for (i = 0, cn = dtd->scaffold[src_node].firstchild;
8027 i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib)
8028 (jobDest++)->numchildren = (unsigned int)cn;
8029 }
8030 }
8031
8032 return ret;
8033 }
8034
8035 static ELEMENT_TYPE *
8036 getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
8037 const char *end) {
8038 DTD *const dtd = parser->m_dtd; /* save one level of indirection */
8039 const XML_Char *name = poolStoreString(&dtd->pool, enc, ptr, end);
8040 ELEMENT_TYPE *ret;
8041
8042 if (! name)
8043 return NULL;
8044 ret = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name,
8045 sizeof(ELEMENT_TYPE));
8046 if (! ret)
8047 return NULL;
8048 if (ret->name != name)
8049 poolDiscard(&dtd->pool);
8050 else {
8051 poolFinish(&dtd->pool);
8052 if (! setElementTypePrefix(parser, ret))
8053 return NULL;
8054 }
8055 return ret;
8056 }
8057
8058 static XML_Char *
8059 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
8060 size_t charsRequired = 0;
8061 XML_Char *result;
8062
8063 /* First determine how long the string is */
8064 while (s[charsRequired] != 0) {
8065 charsRequired++;
8066 }
8067 /* Include the terminator */
8068 charsRequired++;
8069
8070 /* Now allocate space for the copy */
8071 result = memsuite->malloc_fcn(charsRequired * sizeof(XML_Char));
8072 if (result == NULL)
8073 return NULL;
8074 /* Copy the original into place */
8075 memcpy(result, s, charsRequired * sizeof(XML_Char));
8076 return result;
8077 }
8078
8079 #if XML_GE == 1
8080
8081 static float
8082 accountingGetCurrentAmplification(XML_Parser rootParser) {
8083 // 1.........1.........12 => 22
8084 const size_t lenOfShortestInclude = sizeof("<!ENTITY a SYSTEM 'b'>") - 1;
8085 const XmlBigCount countBytesOutput
8086 = rootParser->m_accounting.countBytesDirect
8087 + rootParser->m_accounting.countBytesIndirect;
8088 const float amplificationFactor
8089 = rootParser->m_accounting.countBytesDirect
8090 ? (countBytesOutput
8091 / (float)(rootParser->m_accounting.countBytesDirect))
8092 : ((lenOfShortestInclude
8093 + rootParser->m_accounting.countBytesIndirect)
8094 / (float)lenOfShortestInclude);
8095 assert(! rootParser->m_parentParser);
8096 return amplificationFactor;
8097 }
8098
8099 static void
8100 accountingReportStats(XML_Parser originParser, const char *epilog) {
8101 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8102 assert(! rootParser->m_parentParser);
8103
8104 if (rootParser->m_accounting.debugLevel == 0u) {
8105 return;
8106 }
8107
8108 const float amplificationFactor
8109 = accountingGetCurrentAmplification(rootParser);
8110 fprintf(stderr,
8111 "expat: Accounting(%p): Direct " EXPAT_FMT_ULL(
8112 "10") ", indirect " EXPAT_FMT_ULL("10") ", amplification %8.2f%s",
8113 (void *)rootParser, rootParser->m_accounting.countBytesDirect,
8114 rootParser->m_accounting.countBytesIndirect,
8115 (double)amplificationFactor, epilog);
8116 }
8117
8118 static void
8119 accountingOnAbort(XML_Parser originParser) {
8120 accountingReportStats(originParser, " ABORTING\n");
8121 }
8122
8123 static void
8124 accountingReportDiff(XML_Parser rootParser,
8125 unsigned int levelsAwayFromRootParser, const char *before,
8126 const char *after, ptrdiff_t bytesMore, int source_line,
8127 enum XML_Account account) {
8128 assert(! rootParser->m_parentParser);
8129
8130 fprintf(stderr,
8131 " (+" EXPAT_FMT_PTRDIFF_T("6") " bytes %s|%u, xmlparse.c:%d) %*s\"",
8132 bytesMore, (account == XML_ACCOUNT_DIRECT) ? "DIR" : "EXP",
8133 levelsAwayFromRootParser, source_line, 10, "");
8134
8135 const char ellipis[] = "[..]";
8136 const size_t ellipsisLength = sizeof(ellipis) /* because compile-time */ - 1;
8137 const unsigned int contextLength = 10;
8138
8139 /* Note: Performance is of no concern here */
8140 const char *walker = before;
8141 if ((rootParser->m_accounting.debugLevel >= 3u)
8142 || (after - before)
8143 <= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
8144 for (; walker < after; walker++) {
8145 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8146 }
8147 } else {
8148 for (; walker < before + contextLength; walker++) {
8149 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8150 }
8151 fprintf(stderr, ellipis);
8152 walker = after - contextLength;
8153 for (; walker < after; walker++) {
8154 fprintf(stderr, "%s", unsignedCharToPrintable(walker[0]));
8155 }
8156 }
8157 fprintf(stderr, "\"\n");
8158 }
8159
8160 static XML_Bool
8161 accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
8162 const char *after, int source_line,
8163 enum XML_Account account) {
8164 /* Note: We need to check the token type *first* to be sure that
8165 * we can even access variable <after>, safely.
8166 * E.g. for XML_TOK_NONE <after> may hold an invalid pointer. */
8167 switch (tok) {
8168 case XML_TOK_INVALID:
8169 case XML_TOK_PARTIAL:
8170 case XML_TOK_PARTIAL_CHAR:
8171 case XML_TOK_NONE:
8172 return XML_TRUE;
8173 }
8174
8175 if (account == XML_ACCOUNT_NONE)
8176 return XML_TRUE; /* because these bytes have been accounted for, already */
8177
8178 unsigned int levelsAwayFromRootParser;
8179 const XML_Parser rootParser
8180 = getRootParserOf(originParser, &levelsAwayFromRootParser);
8181 assert(! rootParser->m_parentParser);
8182
8183 const int isDirect
8184 = (account == XML_ACCOUNT_DIRECT) && (originParser == rootParser);
8185 const ptrdiff_t bytesMore = after - before;
8186
8187 XmlBigCount *const additionTarget
8188 = isDirect ? &rootParser->m_accounting.countBytesDirect
8189 : &rootParser->m_accounting.countBytesIndirect;
8190
8191 /* Detect and avoid integer overflow */
8192 if (*additionTarget > (XmlBigCount)(-1) - (XmlBigCount)bytesMore)
8193 return XML_FALSE;
8194 *additionTarget += bytesMore;
8195
8196 const XmlBigCount countBytesOutput
8197 = rootParser->m_accounting.countBytesDirect
8198 + rootParser->m_accounting.countBytesIndirect;
8199 const float amplificationFactor
8200 = accountingGetCurrentAmplification(rootParser);
8201 const XML_Bool tolerated
8202 = (countBytesOutput < rootParser->m_accounting.activationThresholdBytes)
8203 || (amplificationFactor
8204 <= rootParser->m_accounting.maximumAmplificationFactor);
8205
8206 if (rootParser->m_accounting.debugLevel >= 2u) {
8207 accountingReportStats(rootParser, "");
8208 accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
8209 bytesMore, source_line, account);
8210 }
8211
8212 return tolerated;
8213 }
8214
8215 unsigned long long
8216 testingAccountingGetCountBytesDirect(XML_Parser parser) {
8217 if (! parser)
8218 return 0;
8219 return parser->m_accounting.countBytesDirect;
8220 }
8221
8222 unsigned long long
8223 testingAccountingGetCountBytesIndirect(XML_Parser parser) {
8224 if (! parser)
8225 return 0;
8226 return parser->m_accounting.countBytesIndirect;
8227 }
8228
8229 static void
8230 entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
8231 const char *action, int sourceLine) {
8232 assert(! rootParser->m_parentParser);
8233 if (rootParser->m_entity_stats.debugLevel == 0u)
8234 return;
8235
8236 # if defined(XML_UNICODE)
8237 const char *const entityName = "[..]";
8238 # else
8239 const char *const entityName = entity->name;
8240 # endif
8241
8242 fprintf(
8243 stderr,
8244 "expat: Entities(%p): Count %9u, depth %2u/%2u %*s%s%s; %s length %d (xmlparse.c:%d)\n",
8245 (void *)rootParser, rootParser->m_entity_stats.countEverOpened,
8246 rootParser->m_entity_stats.currentDepth,
8247 rootParser->m_entity_stats.maximumDepthSeen,
8248 (rootParser->m_entity_stats.currentDepth - 1) * 2, "",
8249 entity->is_param ? "%" : "&", entityName, action, entity->textLen,
8250 sourceLine);
8251 }
8252
8253 static void
8254 entityTrackingOnOpen(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8255 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8256 assert(! rootParser->m_parentParser);
8257
8258 rootParser->m_entity_stats.countEverOpened++;
8259 rootParser->m_entity_stats.currentDepth++;
8260 if (rootParser->m_entity_stats.currentDepth
8261 > rootParser->m_entity_stats.maximumDepthSeen) {
8262 rootParser->m_entity_stats.maximumDepthSeen++;
8263 }
8264
8265 entityTrackingReportStats(rootParser, entity, "OPEN ", sourceLine);
8266 }
8267
8268 static void
8269 entityTrackingOnClose(XML_Parser originParser, ENTITY *entity, int sourceLine) {
8270 const XML_Parser rootParser = getRootParserOf(originParser, NULL);
8271 assert(! rootParser->m_parentParser);
8272
8273 entityTrackingReportStats(rootParser, entity, "CLOSE", sourceLine);
8274 rootParser->m_entity_stats.currentDepth--;
8275 }
8276
8277 static XML_Parser
8278 getRootParserOf(XML_Parser parser, unsigned int *outLevelDiff) {
8279 XML_Parser rootParser = parser;
8280 unsigned int stepsTakenUpwards = 0;
8281 while (rootParser->m_parentParser) {
8282 rootParser = rootParser->m_parentParser;
8283 stepsTakenUpwards++;
8284 }
8285 assert(! rootParser->m_parentParser);
8286 if (outLevelDiff != NULL) {
8287 *outLevelDiff = stepsTakenUpwards;
8288 }
8289 return rootParser;
8290 }
8291
8292 const char *
8293 unsignedCharToPrintable(unsigned char c) {
8294 switch (c) {
8295 case 0:
8296 return "\\0";
8297 case 1:
8298 return "\\x1";
8299 case 2:
8300 return "\\x2";
8301 case 3:
8302 return "\\x3";
8303 case 4:
8304 return "\\x4";
8305 case 5:
8306 return "\\x5";
8307 case 6:
8308 return "\\x6";
8309 case 7:
8310 return "\\x7";
8311 case 8:
8312 return "\\x8";
8313 case 9:
8314 return "\\t";
8315 case 10:
8316 return "\\n";
8317 case 11:
8318 return "\\xB";
8319 case 12:
8320 return "\\xC";
8321 case 13:
8322 return "\\r";
8323 case 14:
8324 return "\\xE";
8325 case 15:
8326 return "\\xF";
8327 case 16:
8328 return "\\x10";
8329 case 17:
8330 return "\\x11";
8331 case 18:
8332 return "\\x12";
8333 case 19:
8334 return "\\x13";
8335 case 20:
8336 return "\\x14";
8337 case 21:
8338 return "\\x15";
8339 case 22:
8340 return "\\x16";
8341 case 23:
8342 return "\\x17";
8343 case 24:
8344 return "\\x18";
8345 case 25:
8346 return "\\x19";
8347 case 26:
8348 return "\\x1A";
8349 case 27:
8350 return "\\x1B";
8351 case 28:
8352 return "\\x1C";
8353 case 29:
8354 return "\\x1D";
8355 case 30:
8356 return "\\x1E";
8357 case 31:
8358 return "\\x1F";
8359 case 32:
8360 return " ";
8361 case 33:
8362 return "!";
8363 case 34:
8364 return "\\\"";
8365 case 35:
8366 return "#";
8367 case 36:
8368 return "$";
8369 case 37:
8370 return "%";
8371 case 38:
8372 return "&";
8373 case 39:
8374 return "'";
8375 case 40:
8376 return "(";
8377 case 41:
8378 return ")";
8379 case 42:
8380 return "*";
8381 case 43:
8382 return "+";
8383 case 44:
8384 return ",";
8385 case 45:
8386 return "-";
8387 case 46:
8388 return ".";
8389 case 47:
8390 return "/";
8391 case 48:
8392 return "0";
8393 case 49:
8394 return "1";
8395 case 50:
8396 return "2";
8397 case 51:
8398 return "3";
8399 case 52:
8400 return "4";
8401 case 53:
8402 return "5";
8403 case 54:
8404 return "6";
8405 case 55:
8406 return "7";
8407 case 56:
8408 return "8";
8409 case 57:
8410 return "9";
8411 case 58:
8412 return ":";
8413 case 59:
8414 return ";";
8415 case 60:
8416 return "<";
8417 case 61:
8418 return "=";
8419 case 62:
8420 return ">";
8421 case 63:
8422 return "?";
8423 case 64:
8424 return "@";
8425 case 65:
8426 return "A";
8427 case 66:
8428 return "B";
8429 case 67:
8430 return "C";
8431 case 68:
8432 return "D";
8433 case 69:
8434 return "E";
8435 case 70:
8436 return "F";
8437 case 71:
8438 return "G";
8439 case 72:
8440 return "H";
8441 case 73:
8442 return "I";
8443 case 74:
8444 return "J";
8445 case 75:
8446 return "K";
8447 case 76:
8448 return "L";
8449 case 77:
8450 return "M";
8451 case 78:
8452 return "N";
8453 case 79:
8454 return "O";
8455 case 80:
8456 return "P";
8457 case 81:
8458 return "Q";
8459 case 82:
8460 return "R";
8461 case 83:
8462 return "S";
8463 case 84:
8464 return "T";
8465 case 85:
8466 return "U";
8467 case 86:
8468 return "V";
8469 case 87:
8470 return "W";
8471 case 88:
8472 return "X";
8473 case 89:
8474 return "Y";
8475 case 90:
8476 return "Z";
8477 case 91:
8478 return "[";
8479 case 92:
8480 return "\\\\";
8481 case 93:
8482 return "]";
8483 case 94:
8484 return "^";
8485 case 95:
8486 return "_";
8487 case 96:
8488 return "`";
8489 case 97:
8490 return "a";
8491 case 98:
8492 return "b";
8493 case 99:
8494 return "c";
8495 case 100:
8496 return "d";
8497 case 101:
8498 return "e";
8499 case 102:
8500 return "f";
8501 case 103:
8502 return "g";
8503 case 104:
8504 return "h";
8505 case 105:
8506 return "i";
8507 case 106:
8508 return "j";
8509 case 107:
8510 return "k";
8511 case 108:
8512 return "l";
8513 case 109:
8514 return "m";
8515 case 110:
8516 return "n";
8517 case 111:
8518 return "o";
8519 case 112:
8520 return "p";
8521 case 113:
8522 return "q";
8523 case 114:
8524 return "r";
8525 case 115:
8526 return "s";
8527 case 116:
8528 return "t";
8529 case 117:
8530 return "u";
8531 case 118:
8532 return "v";
8533 case 119:
8534 return "w";
8535 case 120:
8536 return "x";
8537 case 121:
8538 return "y";
8539 case 122:
8540 return "z";
8541 case 123:
8542 return "{";
8543 case 124:
8544 return "|";
8545 case 125:
8546 return "}";
8547 case 126:
8548 return "~";
8549 case 127:
8550 return "\\x7F";
8551 case 128:
8552 return "\\x80";
8553 case 129:
8554 return "\\x81";
8555 case 130:
8556 return "\\x82";
8557 case 131:
8558 return "\\x83";
8559 case 132:
8560 return "\\x84";
8561 case 133:
8562 return "\\x85";
8563 case 134:
8564 return "\\x86";
8565 case 135:
8566 return "\\x87";
8567 case 136:
8568 return "\\x88";
8569 case 137:
8570 return "\\x89";
8571 case 138:
8572 return "\\x8A";
8573 case 139:
8574 return "\\x8B";
8575 case 140:
8576 return "\\x8C";
8577 case 141:
8578 return "\\x8D";
8579 case 142:
8580 return "\\x8E";
8581 case 143:
8582 return "\\x8F";
8583 case 144:
8584 return "\\x90";
8585 case 145:
8586 return "\\x91";
8587 case 146:
8588 return "\\x92";
8589 case 147:
8590 return "\\x93";
8591 case 148:
8592 return "\\x94";
8593 case 149:
8594 return "\\x95";
8595 case 150:
8596 return "\\x96";
8597 case 151:
8598 return "\\x97";
8599 case 152:
8600 return "\\x98";
8601 case 153:
8602 return "\\x99";
8603 case 154:
8604 return "\\x9A";
8605 case 155:
8606 return "\\x9B";
8607 case 156:
8608 return "\\x9C";
8609 case 157:
8610 return "\\x9D";
8611 case 158:
8612 return "\\x9E";
8613 case 159:
8614 return "\\x9F";
8615 case 160:
8616 return "\\xA0";
8617 case 161:
8618 return "\\xA1";
8619 case 162:
8620 return "\\xA2";
8621 case 163:
8622 return "\\xA3";
8623 case 164:
8624 return "\\xA4";
8625 case 165:
8626 return "\\xA5";
8627 case 166:
8628 return "\\xA6";
8629 case 167:
8630 return "\\xA7";
8631 case 168:
8632 return "\\xA8";
8633 case 169:
8634 return "\\xA9";
8635 case 170:
8636 return "\\xAA";
8637 case 171:
8638 return "\\xAB";
8639 case 172:
8640 return "\\xAC";
8641 case 173:
8642 return "\\xAD";
8643 case 174:
8644 return "\\xAE";
8645 case 175:
8646 return "\\xAF";
8647 case 176:
8648 return "\\xB0";
8649 case 177:
8650 return "\\xB1";
8651 case 178:
8652 return "\\xB2";
8653 case 179:
8654 return "\\xB3";
8655 case 180:
8656 return "\\xB4";
8657 case 181:
8658 return "\\xB5";
8659 case 182:
8660 return "\\xB6";
8661 case 183:
8662 return "\\xB7";
8663 case 184:
8664 return "\\xB8";
8665 case 185:
8666 return "\\xB9";
8667 case 186:
8668 return "\\xBA";
8669 case 187:
8670 return "\\xBB";
8671 case 188:
8672 return "\\xBC";
8673 case 189:
8674 return "\\xBD";
8675 case 190:
8676 return "\\xBE";
8677 case 191:
8678 return "\\xBF";
8679 case 192:
8680 return "\\xC0";
8681 case 193:
8682 return "\\xC1";
8683 case 194:
8684 return "\\xC2";
8685 case 195:
8686 return "\\xC3";
8687 case 196:
8688 return "\\xC4";
8689 case 197:
8690 return "\\xC5";
8691 case 198:
8692 return "\\xC6";
8693 case 199:
8694 return "\\xC7";
8695 case 200:
8696 return "\\xC8";
8697 case 201:
8698 return "\\xC9";
8699 case 202:
8700 return "\\xCA";
8701 case 203:
8702 return "\\xCB";
8703 case 204:
8704 return "\\xCC";
8705 case 205:
8706 return "\\xCD";
8707 case 206:
8708 return "\\xCE";
8709 case 207:
8710 return "\\xCF";
8711 case 208:
8712 return "\\xD0";
8713 case 209:
8714 return "\\xD1";
8715 case 210:
8716 return "\\xD2";
8717 case 211:
8718 return "\\xD3";
8719 case 212:
8720 return "\\xD4";
8721 case 213:
8722 return "\\xD5";
8723 case 214:
8724 return "\\xD6";
8725 case 215:
8726 return "\\xD7";
8727 case 216:
8728 return "\\xD8";
8729 case 217:
8730 return "\\xD9";
8731 case 218:
8732 return "\\xDA";
8733 case 219:
8734 return "\\xDB";
8735 case 220:
8736 return "\\xDC";
8737 case 221:
8738 return "\\xDD";
8739 case 222:
8740 return "\\xDE";
8741 case 223:
8742 return "\\xDF";
8743 case 224:
8744 return "\\xE0";
8745 case 225:
8746 return "\\xE1";
8747 case 226:
8748 return "\\xE2";
8749 case 227:
8750 return "\\xE3";
8751 case 228:
8752 return "\\xE4";
8753 case 229:
8754 return "\\xE5";
8755 case 230:
8756 return "\\xE6";
8757 case 231:
8758 return "\\xE7";
8759 case 232:
8760 return "\\xE8";
8761 case 233:
8762 return "\\xE9";
8763 case 234:
8764 return "\\xEA";
8765 case 235:
8766 return "\\xEB";
8767 case 236:
8768 return "\\xEC";
8769 case 237:
8770 return "\\xED";
8771 case 238:
8772 return "\\xEE";
8773 case 239:
8774 return "\\xEF";
8775 case 240:
8776 return "\\xF0";
8777 case 241:
8778 return "\\xF1";
8779 case 242:
8780 return "\\xF2";
8781 case 243:
8782 return "\\xF3";
8783 case 244:
8784 return "\\xF4";
8785 case 245:
8786 return "\\xF5";
8787 case 246:
8788 return "\\xF6";
8789 case 247:
8790 return "\\xF7";
8791 case 248:
8792 return "\\xF8";
8793 case 249:
8794 return "\\xF9";
8795 case 250:
8796 return "\\xFA";
8797 case 251:
8798 return "\\xFB";
8799 case 252:
8800 return "\\xFC";
8801 case 253:
8802 return "\\xFD";
8803 case 254:
8804 return "\\xFE";
8805 case 255:
8806 return "\\xFF";
8807 // LCOV_EXCL_START
8808 default:
8809 assert(0); /* never gets here */
8810 return "dead code";
8811 }
8812 assert(0); /* never gets here */
8813 // LCOV_EXCL_STOP
8814 }
8815
8816 #endif /* XML_GE == 1 */
8817
8818 static unsigned long
8819 getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
8820 const char *const valueOrNull = getenv(variableName);
8821 if (valueOrNull == NULL) {
8822 return defaultDebugLevel;
8823 }
8824 const char *const value = valueOrNull;
8825
8826 errno = 0;
8827 char *afterValue = NULL;
8828 unsigned long debugLevel = strtoul(value, &afterValue, 10);
8829 if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
8830 errno = 0;
8831 return defaultDebugLevel;
8832 }
8833
8834 return debugLevel;
8835 }
8836