1 1.1 wiz /* Commonly used functions for the Expat test suite 2 1.1 wiz __ __ _ 3 1.1 wiz ___\ \/ /_ __ __ _| |_ 4 1.1 wiz / _ \\ /| '_ \ / _` | __| 5 1.1 wiz | __// \| |_) | (_| | |_ 6 1.1 wiz \___/_/\_\ .__/ \__,_|\__| 7 1.1 wiz |_| XML parser 8 1.1 wiz 9 1.1 wiz Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake (at) users.sourceforge.net> 10 1.1 wiz Copyright (c) 2003 Greg Stein <gstein (at) users.sourceforge.net> 11 1.1 wiz Copyright (c) 2005-2007 Steven Solie <steven (at) solie.ca> 12 1.1 wiz Copyright (c) 2005-2012 Karl Waclawek <karl (at) waclawek.net> 13 1.1.1.2 wiz Copyright (c) 2016-2025 Sebastian Pipping <sebastian (at) pipping.org> 14 1.1 wiz Copyright (c) 2017-2022 Rhodri James <rhodri (at) wildebeest.org.uk> 15 1.1 wiz Copyright (c) 2017 Joe Orton <jorton (at) redhat.com> 16 1.1 wiz Copyright (c) 2017 Jos Gutirrez de la Concha <jose (at) zeroc.com> 17 1.1 wiz Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu (at) poste.it> 18 1.1 wiz Copyright (c) 2019 David Loffredo <loffredo (at) steptools.com> 19 1.1 wiz Copyright (c) 2020 Tim Gates <tim.gates (at) iress.com> 20 1.1 wiz Copyright (c) 2021 Donghee Na <donghee.na (at) python.org> 21 1.1 wiz Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild (at) sony.com> 22 1.1 wiz Licensed under the MIT license: 23 1.1 wiz 24 1.1 wiz Permission is hereby granted, free of charge, to any person obtaining 25 1.1 wiz a copy of this software and associated documentation files (the 26 1.1 wiz "Software"), to deal in the Software without restriction, including 27 1.1 wiz without limitation the rights to use, copy, modify, merge, publish, 28 1.1 wiz distribute, sublicense, and/or sell copies of the Software, and to permit 29 1.1 wiz persons to whom the Software is furnished to do so, subject to the 30 1.1 wiz following conditions: 31 1.1 wiz 32 1.1 wiz The above copyright notice and this permission notice shall be included 33 1.1 wiz in all copies or substantial portions of the Software. 34 1.1 wiz 35 1.1 wiz THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 36 1.1 wiz EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 37 1.1 wiz MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 38 1.1 wiz NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 39 1.1 wiz DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 40 1.1 wiz OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 41 1.1 wiz USE OR OTHER DEALINGS IN THE SOFTWARE. 42 1.1 wiz */ 43 1.1 wiz 44 1.1 wiz #include <assert.h> 45 1.1.1.2 wiz #include <errno.h> 46 1.1.1.2 wiz #include <stdint.h> // for SIZE_MAX 47 1.1 wiz #include <stdio.h> 48 1.1 wiz #include <string.h> 49 1.1 wiz 50 1.1 wiz #include "expat_config.h" 51 1.1 wiz #include "expat.h" 52 1.1 wiz #include "internal.h" 53 1.1 wiz #include "chardata.h" 54 1.1 wiz #include "minicheck.h" 55 1.1 wiz #include "common.h" 56 1.1 wiz #include "handlers.h" 57 1.1 wiz 58 1.1 wiz /* Common test data */ 59 1.1 wiz 60 1.1 wiz const char *long_character_data_text 61 1.1 wiz = "<?xml version='1.0' encoding='iso-8859-1'?><s>" 62 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 63 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 64 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 65 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 66 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 67 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 68 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 69 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 70 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 71 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 72 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 73 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 74 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 75 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 76 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 77 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 78 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 79 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 80 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 81 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 82 1.1 wiz "</s>"; 83 1.1 wiz 84 1.1 wiz const char *long_cdata_text 85 1.1 wiz = "<s><![CDATA[" 86 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 87 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 88 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 89 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 90 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 91 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 92 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 93 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 94 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 95 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 96 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 97 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 98 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 99 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 100 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 101 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 102 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 103 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 104 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 105 1.1 wiz "012345678901234567890123456789012345678901234567890123456789" 106 1.1 wiz "]]></s>"; 107 1.1 wiz 108 1.1 wiz /* Having an element name longer than 1024 characters exercises some 109 1.1 wiz * of the pool allocation code in the parser that otherwise does not 110 1.1 wiz * get executed. The count at the end of the line is the number of 111 1.1 wiz * characters (bytes) in the element name by that point.x 112 1.1 wiz */ 113 1.1 wiz const char *get_buffer_test_text 114 1.1 wiz = "<documentwitharidiculouslylongelementnametotease" /* 0x030 */ 115 1.1 wiz "aparticularcorneroftheallocationinXML_GetBuffers" /* 0x060 */ 116 1.1 wiz "othatwecanimprovethecoverageyetagain012345678901" /* 0x090 */ 117 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x0c0 */ 118 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x0f0 */ 119 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x120 */ 120 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x150 */ 121 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x180 */ 122 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x1b0 */ 123 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x1e0 */ 124 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x210 */ 125 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x240 */ 126 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x270 */ 127 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x2a0 */ 128 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x2d0 */ 129 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x300 */ 130 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x330 */ 131 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x360 */ 132 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x390 */ 133 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3c0 */ 134 1.1 wiz "123456789abcdef0123456789abcdef0123456789abcdef0" /* 0x3f0 */ 135 1.1 wiz "123456789abcdef0123456789abcdef0123456789>\n<ef0"; /* 0x420 */ 136 1.1 wiz 137 1.1 wiz /* Test control globals */ 138 1.1 wiz 139 1.1 wiz /* Used as the "resumable" parameter to XML_StopParser by some tests */ 140 1.1 wiz XML_Bool g_resumable = XML_FALSE; 141 1.1 wiz 142 1.1 wiz /* Used to control abort checks in some tests */ 143 1.1 wiz XML_Bool g_abortable = XML_FALSE; 144 1.1 wiz 145 1.1 wiz /* Used to control _XML_Parse_SINGLE_BYTES() chunk size */ 146 1.1 wiz int g_chunkSize = 1; 147 1.1 wiz 148 1.1 wiz /* Common test functions */ 149 1.1 wiz 150 1.1 wiz void 151 1.1 wiz tcase_add_test__ifdef_xml_dtd(TCase *tc, tcase_test_function test) { 152 1.1 wiz #ifdef XML_DTD 153 1.1 wiz tcase_add_test(tc, test); 154 1.1 wiz #else 155 1.1 wiz UNUSED_P(tc); 156 1.1 wiz UNUSED_P(test); 157 1.1 wiz #endif 158 1.1 wiz } 159 1.1 wiz 160 1.1 wiz void 161 1.1 wiz tcase_add_test__if_xml_ge(TCase *tc, tcase_test_function test) { 162 1.1 wiz #if XML_GE == 1 163 1.1 wiz tcase_add_test(tc, test); 164 1.1 wiz #else 165 1.1 wiz UNUSED_P(tc); 166 1.1 wiz UNUSED_P(test); 167 1.1 wiz #endif 168 1.1 wiz } 169 1.1 wiz 170 1.1 wiz void 171 1.1 wiz basic_teardown(void) { 172 1.1 wiz if (g_parser != NULL) { 173 1.1 wiz XML_ParserFree(g_parser); 174 1.1 wiz g_parser = NULL; 175 1.1 wiz } 176 1.1 wiz } 177 1.1 wiz 178 1.1 wiz /* Generate a failure using the parser state to create an error message; 179 1.1 wiz this should be used when the parser reports an error we weren't 180 1.1 wiz expecting. 181 1.1 wiz */ 182 1.1 wiz void 183 1.1 wiz _xml_failure(XML_Parser parser, const char *file, int line) { 184 1.1 wiz char buffer[1024]; 185 1.1 wiz enum XML_Error err = XML_GetErrorCode(parser); 186 1.1 wiz snprintf(buffer, sizeof(buffer), 187 1.1 wiz " %d: %" XML_FMT_STR " (line %" XML_FMT_INT_MOD 188 1.1 wiz "u, offset %" XML_FMT_INT_MOD "u)\n reported from %s, line %d\n", 189 1.1 wiz err, XML_ErrorString(err), XML_GetCurrentLineNumber(parser), 190 1.1 wiz XML_GetCurrentColumnNumber(parser), file, line); 191 1.1 wiz _fail(file, line, buffer); 192 1.1 wiz } 193 1.1 wiz 194 1.1 wiz enum XML_Status 195 1.1 wiz _XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, 196 1.1 wiz int isFinal) { 197 1.1 wiz // This ensures that tests have to run pathological parse cases 198 1.1 wiz // (e.g. when `s` is NULL) against plain XML_Parse rather than 199 1.1 wiz // chunking _XML_Parse_SINGLE_BYTES. 200 1.1 wiz assert((parser != NULL) && (s != NULL) && (len >= 0)); 201 1.1 wiz const int chunksize = g_chunkSize; 202 1.1 wiz if (chunksize > 0) { 203 1.1 wiz // parse in chunks of `chunksize` bytes as long as not exhausting 204 1.1 wiz for (; len > chunksize; len -= chunksize, s += chunksize) { 205 1.1 wiz enum XML_Status res = XML_Parse(parser, s, chunksize, XML_FALSE); 206 1.1 wiz if (res != XML_STATUS_OK) { 207 1.1 wiz if ((res == XML_STATUS_SUSPENDED) && (len > chunksize)) { 208 1.1 wiz fail("Use of function _XML_Parse_SINGLE_BYTES with a chunk size " 209 1.1 wiz "greater than 0 (from g_chunkSize) does not work well with " 210 1.1 wiz "suspension. Please consider use of plain XML_Parse at this " 211 1.1 wiz "place in your test, instead."); 212 1.1 wiz } 213 1.1 wiz return res; 214 1.1 wiz } 215 1.1 wiz } 216 1.1 wiz } 217 1.1 wiz // parse the final chunk, the size of which will be <= chunksize 218 1.1 wiz return XML_Parse(parser, s, len, isFinal); 219 1.1 wiz } 220 1.1 wiz 221 1.1 wiz void 222 1.1 wiz _expect_failure(const char *text, enum XML_Error errorCode, 223 1.1 wiz const char *errorMessage, const char *file, int lineno) { 224 1.1 wiz if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 225 1.1 wiz == XML_STATUS_OK) 226 1.1 wiz /* Hackish use of _fail() macro, but lets us report 227 1.1 wiz the right filename and line number. */ 228 1.1 wiz _fail(file, lineno, errorMessage); 229 1.1 wiz if (XML_GetErrorCode(g_parser) != errorCode) 230 1.1 wiz _xml_failure(g_parser, file, lineno); 231 1.1 wiz } 232 1.1 wiz 233 1.1 wiz void 234 1.1 wiz _run_character_check(const char *text, const XML_Char *expected, 235 1.1 wiz const char *file, int line) { 236 1.1 wiz CharData storage; 237 1.1 wiz 238 1.1 wiz CharData_Init(&storage); 239 1.1 wiz XML_SetUserData(g_parser, &storage); 240 1.1 wiz XML_SetCharacterDataHandler(g_parser, accumulate_characters); 241 1.1 wiz if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 242 1.1 wiz == XML_STATUS_ERROR) 243 1.1 wiz _xml_failure(g_parser, file, line); 244 1.1 wiz CharData_CheckXMLChars(&storage, expected); 245 1.1 wiz } 246 1.1 wiz 247 1.1 wiz void 248 1.1 wiz _run_attribute_check(const char *text, const XML_Char *expected, 249 1.1 wiz const char *file, int line) { 250 1.1 wiz CharData storage; 251 1.1 wiz 252 1.1 wiz CharData_Init(&storage); 253 1.1 wiz XML_SetUserData(g_parser, &storage); 254 1.1 wiz XML_SetStartElementHandler(g_parser, accumulate_attribute); 255 1.1 wiz if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 256 1.1 wiz == XML_STATUS_ERROR) 257 1.1 wiz _xml_failure(g_parser, file, line); 258 1.1 wiz CharData_CheckXMLChars(&storage, expected); 259 1.1 wiz } 260 1.1 wiz 261 1.1 wiz void 262 1.1 wiz _run_ext_character_check(const char *text, ExtTest *test_data, 263 1.1 wiz const XML_Char *expected, const char *file, int line) { 264 1.1 wiz CharData *const storage = (CharData *)malloc(sizeof(CharData)); 265 1.1 wiz 266 1.1 wiz CharData_Init(storage); 267 1.1 wiz test_data->storage = storage; 268 1.1 wiz XML_SetUserData(g_parser, test_data); 269 1.1 wiz XML_SetCharacterDataHandler(g_parser, ext_accumulate_characters); 270 1.1 wiz if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 271 1.1 wiz == XML_STATUS_ERROR) 272 1.1 wiz _xml_failure(g_parser, file, line); 273 1.1 wiz CharData_CheckXMLChars(storage, expected); 274 1.1 wiz 275 1.1 wiz free(storage); 276 1.1 wiz } 277 1.1 wiz 278 1.1 wiz /* Control variable; the number of times duff_allocator() will successfully 279 1.1 wiz * allocate */ 280 1.1 wiz #define ALLOC_ALWAYS_SUCCEED (-1) 281 1.1 wiz #define REALLOC_ALWAYS_SUCCEED (-1) 282 1.1 wiz 283 1.1 wiz int g_allocation_count = ALLOC_ALWAYS_SUCCEED; 284 1.1 wiz int g_reallocation_count = REALLOC_ALWAYS_SUCCEED; 285 1.1 wiz 286 1.1 wiz /* Crocked allocator for allocation failure tests */ 287 1.1 wiz void * 288 1.1 wiz duff_allocator(size_t size) { 289 1.1 wiz if (g_allocation_count == 0) 290 1.1 wiz return NULL; 291 1.1 wiz if (g_allocation_count != ALLOC_ALWAYS_SUCCEED) 292 1.1 wiz g_allocation_count--; 293 1.1 wiz return malloc(size); 294 1.1 wiz } 295 1.1 wiz 296 1.1 wiz /* Crocked reallocator for allocation failure tests */ 297 1.1 wiz void * 298 1.1 wiz duff_reallocator(void *ptr, size_t size) { 299 1.1 wiz if (g_reallocation_count == 0) 300 1.1 wiz return NULL; 301 1.1 wiz if (g_reallocation_count != REALLOC_ALWAYS_SUCCEED) 302 1.1 wiz g_reallocation_count--; 303 1.1 wiz return realloc(ptr, size); 304 1.1 wiz } 305 1.1.1.2 wiz 306 1.1.1.2 wiz // Portable remake of strndup(3) for C99; does not care about space efficiency 307 1.1.1.2 wiz char * 308 1.1.1.2 wiz portable_strndup(const char *s, size_t n) { 309 1.1.1.2 wiz if ((s == NULL) || (n == SIZE_MAX)) { 310 1.1.1.2 wiz errno = EINVAL; 311 1.1.1.2 wiz return NULL; 312 1.1.1.2 wiz } 313 1.1.1.2 wiz 314 1.1.1.2 wiz char *const buffer = (char *)malloc(n + 1); 315 1.1.1.2 wiz if (buffer == NULL) { 316 1.1.1.2 wiz errno = ENOMEM; 317 1.1.1.2 wiz return NULL; 318 1.1.1.2 wiz } 319 1.1.1.2 wiz 320 1.1.1.2 wiz errno = 0; 321 1.1.1.2 wiz 322 1.1.1.2 wiz memcpy(buffer, s, n); 323 1.1.1.2 wiz 324 1.1.1.2 wiz buffer[n] = '\0'; 325 1.1.1.2 wiz 326 1.1.1.2 wiz return buffer; 327 1.1.1.2 wiz } 328