Home | History | Annotate | Line # | Download | only in xmlwf
      1 /*
      2                             __  __            _
      3                          ___\ \/ /_ __   __ _| |_
      4                         / _ \\  /| '_ \ / _` | __|
      5                        |  __//  \| |_) | (_| | |_
      6                         \___/_/\_\ .__/ \__,_|\__|
      7                                  |_| XML parser
      8 
      9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
     10    Copyright (c) 2000      Clark Cooper <coopercc (at) users.sourceforge.net>
     11    Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake (at) users.sourceforge.net>
     12    Copyright (c) 2004-2006 Karl Waclawek <karl (at) waclawek.net>
     13    Copyright (c) 2005-2007 Steven Solie <steven (at) solie.ca>
     14    Copyright (c) 2016-2025 Sebastian Pipping <sebastian (at) pipping.org>
     15    Copyright (c) 2017      Rhodri James <rhodri (at) wildebeest.org.uk>
     16    Copyright (c) 2019      David Loffredo <loffredo (at) steptools.com>
     17    Copyright (c) 2021      Donghee Na <donghee.na (at) python.org>
     18    Copyright (c) 2024      Hanno Bck <hanno (at) gentoo.org>
     19    Copyright (c) 2025      Alfonso Gregory <gfunni234 (at) gmail.com>
     20    Copyright (c) 2026      Matthew Fernandez <matthew.fernandez (at) gmail.com>
     21    Licensed under the MIT license:
     22 
     23    Permission is  hereby granted,  free of charge,  to any  person obtaining
     24    a  copy  of  this  software   and  associated  documentation  files  (the
     25    "Software"),  to  deal in  the  Software  without restriction,  including
     26    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
     27    distribute, sublicense, and/or sell copies of the Software, and to permit
     28    persons  to whom  the Software  is  furnished to  do so,  subject to  the
     29    following conditions:
     30 
     31    The above copyright  notice and this permission notice  shall be included
     32    in all copies or substantial portions of the Software.
     33 
     34    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
     35    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
     36    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
     37    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
     38    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
     39    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     40    USE OR OTHER DEALINGS IN THE SOFTWARE.
     41 */
     42 
     43 #include "expat_config.h"
     44 
     45 #include <stdio.h>
     46 #include <stdlib.h>
     47 #include <stddef.h>
     48 #include <string.h>
     49 #include <fcntl.h>
     50 
     51 #ifdef _WIN32
     52 #  include "winconfig.h"
     53 #endif
     54 
     55 #include "expat.h"
     56 #include "internal.h" /* for UNUSED_P only */
     57 #include "xmlfile.h"
     58 #include "xmltchar.h"
     59 #include "filemap.h"
     60 
     61 /* Function "read": */
     62 #if defined(_MSC_VER)
     63 #  include <io.h>
     64 /* https://msdn.microsoft.com/en-us/library/wyssk1bs(v=vs.100).aspx */
     65 #  define EXPAT_read _read
     66 #  define EXPAT_read_count_t int
     67 #  define EXPAT_read_req_t unsigned int
     68 #else /* POSIX */
     69 #  include <unistd.h>
     70 /* https://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */
     71 #  define EXPAT_read read
     72 #  define EXPAT_read_count_t ssize_t
     73 #  define EXPAT_read_req_t size_t
     74 #endif
     75 
     76 #ifndef O_BINARY
     77 #  ifdef _O_BINARY
     78 #    define O_BINARY _O_BINARY
     79 #  else
     80 #    define O_BINARY 0
     81 #  endif
     82 #endif
     83 
     84 int g_read_size_bytes = 1024 * 8;
     85 
     86 typedef struct {
     87   XML_Parser parser;
     88   int *retPtr;
     89 } PROCESS_ARGS;
     90 
     91 static int processStream(const XML_Char *filename, XML_Parser parser);
     92 
     93 static void
     94 reportError(XML_Parser parser, const XML_Char *filename) {
     95   enum XML_Error code = XML_GetErrorCode(parser);
     96   const XML_Char *message = XML_ErrorString(code);
     97   if (message)
     98     ftprintf(stdout,
     99              T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
    100                  T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
    101              filename, XML_GetCurrentLineNumber(parser),
    102              XML_GetCurrentColumnNumber(parser), message);
    103   else
    104     ftprintf(stderr, T("%s: (unknown message %u)\n"), filename,
    105              (unsigned int)code);
    106 }
    107 
    108 /* This implementation will give problems on files larger than INT_MAX. */
    109 static void
    110 processFile(const void *data, size_t size, const XML_Char *filename,
    111             void *args) {
    112   XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
    113   int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
    114   if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
    115     reportError(parser, filename);
    116     *retPtr = 0;
    117   } else
    118     *retPtr = 1;
    119 }
    120 
    121 #if defined(_WIN32)
    122 
    123 static int
    124 isAsciiLetter(XML_Char c) {
    125   return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
    126 }
    127 
    128 #endif /* _WIN32 */
    129 
    130 static const XML_Char *
    131 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
    132                 XML_Char **toFree) {
    133   XML_Char *s;
    134   *toFree = 0;
    135   if (! base || *systemId == T('/')
    136 #if defined(_WIN32)
    137       || *systemId == T('\\')
    138       || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
    139 #endif
    140   )
    141     return systemId;
    142   *toFree = malloc((tcslen(base) + tcslen(systemId) + 2) * sizeof(XML_Char));
    143   if (! *toFree)
    144     return systemId;
    145   tcscpy(*toFree, base);
    146   s = *toFree;
    147   if (tcsrchr(s, T('/')))
    148     s = tcsrchr(s, T('/')) + 1;
    149 #if defined(_WIN32)
    150   if (tcsrchr(s, T('\\')))
    151     s = tcsrchr(s, T('\\')) + 1;
    152 #endif
    153   tcscpy(s, systemId);
    154   return *toFree;
    155 }
    156 
    157 static int
    158 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
    159                          const XML_Char *base, const XML_Char *systemId,
    160                          const XML_Char *publicId) {
    161   int result;
    162   XML_Char *s;
    163   const XML_Char *filename;
    164   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
    165   int filemapRes;
    166   PROCESS_ARGS args;
    167   UNUSED_P(publicId);
    168   args.retPtr = &result;
    169   args.parser = entParser;
    170   filename = resolveSystemId(base, systemId, &s);
    171   XML_SetBase(entParser, filename);
    172   filemapRes = filemap(filename, processFile, &args);
    173   switch (filemapRes) {
    174   case 0:
    175     result = 0;
    176     break;
    177   case 2:
    178     ftprintf(stderr,
    179              T("%s: file too large for memory-mapping")
    180                  T(", switching to streaming\n"),
    181              filename);
    182     result = processStream(filename, entParser);
    183     break;
    184   }
    185   free(s);
    186   XML_ParserFree(entParser);
    187   return result;
    188 }
    189 
    190 static int
    191 processStream(const XML_Char *filename, XML_Parser parser) {
    192   /* passing NULL for filename means read input from stdin */
    193   int fd = 0; /* 0 is the fileno for stdin */
    194 
    195   if (filename != NULL) {
    196     fd = topen(filename, O_BINARY | O_RDONLY);
    197     if (fd < 0) {
    198       tperror(filename);
    199       return 0;
    200     }
    201   }
    202   for (;;) {
    203     EXPAT_read_count_t nread;
    204     char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes);
    205     if (! buf) {
    206       if (filename != NULL)
    207         close(fd);
    208       ftprintf(stderr, T("%s: out of memory\n"),
    209                filename != NULL ? filename : T("xmlwf"));
    210       return 0;
    211     }
    212     nread = EXPAT_read(fd, buf, (EXPAT_read_req_t)g_read_size_bytes);
    213     if (nread < 0) {
    214       tperror(filename != NULL ? filename : T("STDIN"));
    215       if (filename != NULL)
    216         close(fd);
    217       return 0;
    218     }
    219     if (XML_ParseBuffer(parser, (int)nread, nread == 0) == XML_STATUS_ERROR) {
    220       reportError(parser, filename != NULL ? filename : T("STDIN"));
    221       if (filename != NULL)
    222         close(fd);
    223       return 0;
    224     }
    225     if (nread == 0) {
    226       if (filename != NULL)
    227         close(fd);
    228       break;
    229     }
    230   }
    231   return 1;
    232 }
    233 
    234 static int
    235 externalEntityRefStream(XML_Parser parser, const XML_Char *context,
    236                         const XML_Char *base, const XML_Char *systemId,
    237                         const XML_Char *publicId) {
    238   XML_Char *s;
    239   const XML_Char *filename;
    240   int ret;
    241   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
    242   UNUSED_P(publicId);
    243   filename = resolveSystemId(base, systemId, &s);
    244   XML_SetBase(entParser, filename);
    245   ret = processStream(filename, entParser);
    246   free(s);
    247   XML_ParserFree(entParser);
    248   return ret;
    249 }
    250 
    251 int
    252 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
    253   int result;
    254 
    255   if (! XML_SetBase(parser, filename)) {
    256     ftprintf(stderr, T("%s: out of memory"), filename);
    257     exit(1);
    258   }
    259 
    260   if (flags & XML_EXTERNAL_ENTITIES)
    261     XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
    262                                                 ? externalEntityRefFilemap
    263                                                 : externalEntityRefStream);
    264   if (flags & XML_MAP_FILE) {
    265     int filemapRes;
    266     PROCESS_ARGS args;
    267     args.retPtr = &result;
    268     args.parser = parser;
    269     filemapRes = filemap(filename, processFile, &args);
    270     switch (filemapRes) {
    271     case 0:
    272       result = 0;
    273       break;
    274     case 2:
    275       ftprintf(stderr,
    276                T("%s: file too large for memory-mapping")
    277                    T(", switching to streaming\n"),
    278                filename);
    279       result = processStream(filename, parser);
    280       break;
    281     }
    282   } else
    283     result = processStream(filename, parser);
    284   return result;
    285 }
    286