1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000 Clark Cooper <coopercc (at) users.sourceforge.net> 11 Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake (at) users.sourceforge.net> 12 Copyright (c) 2004-2006 Karl Waclawek <karl (at) waclawek.net> 13 Copyright (c) 2005-2007 Steven Solie <steven (at) solie.ca> 14 Copyright (c) 2016-2025 Sebastian Pipping <sebastian (at) pipping.org> 15 Copyright (c) 2017 Rhodri James <rhodri (at) wildebeest.org.uk> 16 Copyright (c) 2019 David Loffredo <loffredo (at) steptools.com> 17 Copyright (c) 2021 Donghee Na <donghee.na (at) python.org> 18 Copyright (c) 2024 Hanno Bck <hanno (at) gentoo.org> 19 Copyright (c) 2025 Alfonso Gregory <gfunni234 (at) gmail.com> 20 Copyright (c) 2026 Matthew Fernandez <matthew.fernandez (at) gmail.com> 21 Licensed under the MIT license: 22 23 Permission is hereby granted, free of charge, to any person obtaining 24 a copy of this software and associated documentation files (the 25 "Software"), to deal in the Software without restriction, including 26 without limitation the rights to use, copy, modify, merge, publish, 27 distribute, sublicense, and/or sell copies of the Software, and to permit 28 persons to whom the Software is furnished to do so, subject to the 29 following conditions: 30 31 The above copyright notice and this permission notice shall be included 32 in all copies or substantial portions of the Software. 33 34 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 35 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 36 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 37 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 38 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 39 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 40 USE OR OTHER DEALINGS IN THE SOFTWARE. 41 */ 42 43 #include "expat_config.h" 44 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <stddef.h> 48 #include <string.h> 49 #include <fcntl.h> 50 51 #ifdef _WIN32 52 # include "winconfig.h" 53 #endif 54 55 #include "expat.h" 56 #include "internal.h" /* for UNUSED_P only */ 57 #include "xmlfile.h" 58 #include "xmltchar.h" 59 #include "filemap.h" 60 61 /* Function "read": */ 62 #if defined(_MSC_VER) 63 # include <io.h> 64 /* https://msdn.microsoft.com/en-us/library/wyssk1bs(v=vs.100).aspx */ 65 # define EXPAT_read _read 66 # define EXPAT_read_count_t int 67 # define EXPAT_read_req_t unsigned int 68 #else /* POSIX */ 69 # include <unistd.h> 70 /* https://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */ 71 # define EXPAT_read read 72 # define EXPAT_read_count_t ssize_t 73 # define EXPAT_read_req_t size_t 74 #endif 75 76 #ifndef O_BINARY 77 # ifdef _O_BINARY 78 # define O_BINARY _O_BINARY 79 # else 80 # define O_BINARY 0 81 # endif 82 #endif 83 84 int g_read_size_bytes = 1024 * 8; 85 86 typedef struct { 87 XML_Parser parser; 88 int *retPtr; 89 } PROCESS_ARGS; 90 91 static int processStream(const XML_Char *filename, XML_Parser parser); 92 93 static void 94 reportError(XML_Parser parser, const XML_Char *filename) { 95 enum XML_Error code = XML_GetErrorCode(parser); 96 const XML_Char *message = XML_ErrorString(code); 97 if (message) 98 ftprintf(stdout, 99 T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%") 100 T(XML_FMT_INT_MOD) T("u") T(": %s\n"), 101 filename, XML_GetCurrentLineNumber(parser), 102 XML_GetCurrentColumnNumber(parser), message); 103 else 104 ftprintf(stderr, T("%s: (unknown message %u)\n"), filename, 105 (unsigned int)code); 106 } 107 108 /* This implementation will give problems on files larger than INT_MAX. */ 109 static void 110 processFile(const void *data, size_t size, const XML_Char *filename, 111 void *args) { 112 XML_Parser parser = ((PROCESS_ARGS *)args)->parser; 113 int *retPtr = ((PROCESS_ARGS *)args)->retPtr; 114 if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) { 115 reportError(parser, filename); 116 *retPtr = 0; 117 } else 118 *retPtr = 1; 119 } 120 121 #if defined(_WIN32) 122 123 static int 124 isAsciiLetter(XML_Char c) { 125 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z')); 126 } 127 128 #endif /* _WIN32 */ 129 130 static const XML_Char * 131 resolveSystemId(const XML_Char *base, const XML_Char *systemId, 132 XML_Char **toFree) { 133 XML_Char *s; 134 *toFree = 0; 135 if (! base || *systemId == T('/') 136 #if defined(_WIN32) 137 || *systemId == T('\\') 138 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':')) 139 #endif 140 ) 141 return systemId; 142 *toFree = malloc((tcslen(base) + tcslen(systemId) + 2) * sizeof(XML_Char)); 143 if (! *toFree) 144 return systemId; 145 tcscpy(*toFree, base); 146 s = *toFree; 147 if (tcsrchr(s, T('/'))) 148 s = tcsrchr(s, T('/')) + 1; 149 #if defined(_WIN32) 150 if (tcsrchr(s, T('\\'))) 151 s = tcsrchr(s, T('\\')) + 1; 152 #endif 153 tcscpy(s, systemId); 154 return *toFree; 155 } 156 157 static int 158 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context, 159 const XML_Char *base, const XML_Char *systemId, 160 const XML_Char *publicId) { 161 int result; 162 XML_Char *s; 163 const XML_Char *filename; 164 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); 165 int filemapRes; 166 PROCESS_ARGS args; 167 UNUSED_P(publicId); 168 args.retPtr = &result; 169 args.parser = entParser; 170 filename = resolveSystemId(base, systemId, &s); 171 XML_SetBase(entParser, filename); 172 filemapRes = filemap(filename, processFile, &args); 173 switch (filemapRes) { 174 case 0: 175 result = 0; 176 break; 177 case 2: 178 ftprintf(stderr, 179 T("%s: file too large for memory-mapping") 180 T(", switching to streaming\n"), 181 filename); 182 result = processStream(filename, entParser); 183 break; 184 } 185 free(s); 186 XML_ParserFree(entParser); 187 return result; 188 } 189 190 static int 191 processStream(const XML_Char *filename, XML_Parser parser) { 192 /* passing NULL for filename means read input from stdin */ 193 int fd = 0; /* 0 is the fileno for stdin */ 194 195 if (filename != NULL) { 196 fd = topen(filename, O_BINARY | O_RDONLY); 197 if (fd < 0) { 198 tperror(filename); 199 return 0; 200 } 201 } 202 for (;;) { 203 EXPAT_read_count_t nread; 204 char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes); 205 if (! buf) { 206 if (filename != NULL) 207 close(fd); 208 ftprintf(stderr, T("%s: out of memory\n"), 209 filename != NULL ? filename : T("xmlwf")); 210 return 0; 211 } 212 nread = EXPAT_read(fd, buf, (EXPAT_read_req_t)g_read_size_bytes); 213 if (nread < 0) { 214 tperror(filename != NULL ? filename : T("STDIN")); 215 if (filename != NULL) 216 close(fd); 217 return 0; 218 } 219 if (XML_ParseBuffer(parser, (int)nread, nread == 0) == XML_STATUS_ERROR) { 220 reportError(parser, filename != NULL ? filename : T("STDIN")); 221 if (filename != NULL) 222 close(fd); 223 return 0; 224 } 225 if (nread == 0) { 226 if (filename != NULL) 227 close(fd); 228 break; 229 } 230 } 231 return 1; 232 } 233 234 static int 235 externalEntityRefStream(XML_Parser parser, const XML_Char *context, 236 const XML_Char *base, const XML_Char *systemId, 237 const XML_Char *publicId) { 238 XML_Char *s; 239 const XML_Char *filename; 240 int ret; 241 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); 242 UNUSED_P(publicId); 243 filename = resolveSystemId(base, systemId, &s); 244 XML_SetBase(entParser, filename); 245 ret = processStream(filename, entParser); 246 free(s); 247 XML_ParserFree(entParser); 248 return ret; 249 } 250 251 int 252 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) { 253 int result; 254 255 if (! XML_SetBase(parser, filename)) { 256 ftprintf(stderr, T("%s: out of memory"), filename); 257 exit(1); 258 } 259 260 if (flags & XML_EXTERNAL_ENTITIES) 261 XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE) 262 ? externalEntityRefFilemap 263 : externalEntityRefStream); 264 if (flags & XML_MAP_FILE) { 265 int filemapRes; 266 PROCESS_ARGS args; 267 args.retPtr = &result; 268 args.parser = parser; 269 filemapRes = filemap(filename, processFile, &args); 270 switch (filemapRes) { 271 case 0: 272 result = 0; 273 break; 274 case 2: 275 ftprintf(stderr, 276 T("%s: file too large for memory-mapping") 277 T(", switching to streaming\n"), 278 filename); 279 result = processStream(filename, parser); 280 break; 281 } 282 } else 283 result = processStream(filename, parser); 284 return result; 285 } 286