Home | History | Annotate | Line # | Download | only in xmlwf
      1 /*
      2                             __  __            _
      3                          ___\ \/ /_ __   __ _| |_
      4                         / _ \\  /| '_ \ / _` | __|
      5                        |  __//  \| |_) | (_| | |_
      6                         \___/_/\_\ .__/ \__,_|\__|
      7                                  |_| XML parser
      8 
      9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
     10    Copyright (c) 2000      Clark Cooper <coopercc (at) users.sourceforge.net>
     11    Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake (at) users.sourceforge.net>
     12    Copyright (c) 2004-2006 Karl Waclawek <karl (at) waclawek.net>
     13    Copyright (c) 2005-2007 Steven Solie <steven (at) solie.ca>
     14    Copyright (c) 2016-2023 Sebastian Pipping <sebastian (at) pipping.org>
     15    Copyright (c) 2017      Rhodri James <rhodri (at) wildebeest.org.uk>
     16    Copyright (c) 2019      David Loffredo <loffredo (at) steptools.com>
     17    Copyright (c) 2021      Donghee Na <donghee.na (at) python.org>
     18    Copyright (c) 2024      Hanno Bck <hanno (at) gentoo.org>
     19    Licensed under the MIT license:
     20 
     21    Permission is  hereby granted,  free of charge,  to any  person obtaining
     22    a  copy  of  this  software   and  associated  documentation  files  (the
     23    "Software"),  to  deal in  the  Software  without restriction,  including
     24    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
     25    distribute, sublicense, and/or sell copies of the Software, and to permit
     26    persons  to whom  the Software  is  furnished to  do so,  subject to  the
     27    following conditions:
     28 
     29    The above copyright  notice and this permission notice  shall be included
     30    in all copies or substantial portions of the Software.
     31 
     32    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
     33    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
     34    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
     35    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
     36    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
     37    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     38    USE OR OTHER DEALINGS IN THE SOFTWARE.
     39 */
     40 
     41 #include "expat_config.h"
     42 
     43 #include <stdio.h>
     44 #include <stdlib.h>
     45 #include <stddef.h>
     46 #include <string.h>
     47 #include <fcntl.h>
     48 
     49 #ifdef _WIN32
     50 #  include "winconfig.h"
     51 #endif
     52 
     53 #include "expat.h"
     54 #include "internal.h" /* for UNUSED_P only */
     55 #include "xmlfile.h"
     56 #include "xmltchar.h"
     57 #include "filemap.h"
     58 
     59 #if defined(_MSC_VER)
     60 #  include <io.h>
     61 #endif
     62 
     63 #ifdef HAVE_UNISTD_H
     64 #  include <unistd.h>
     65 #endif
     66 
     67 #ifndef O_BINARY
     68 #  ifdef _O_BINARY
     69 #    define O_BINARY _O_BINARY
     70 #  else
     71 #    define O_BINARY 0
     72 #  endif
     73 #endif
     74 
     75 int g_read_size_bytes = 1024 * 8;
     76 
     77 typedef struct {
     78   XML_Parser parser;
     79   int *retPtr;
     80 } PROCESS_ARGS;
     81 
     82 static int processStream(const XML_Char *filename, XML_Parser parser);
     83 
     84 static void
     85 reportError(XML_Parser parser, const XML_Char *filename) {
     86   enum XML_Error code = XML_GetErrorCode(parser);
     87   const XML_Char *message = XML_ErrorString(code);
     88   if (message)
     89     ftprintf(stdout,
     90              T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
     91                  T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
     92              filename, XML_GetErrorLineNumber(parser),
     93              XML_GetErrorColumnNumber(parser), message);
     94   else
     95     ftprintf(stderr, T("%s: (unknown message %u)\n"), filename,
     96              (unsigned int)code);
     97 }
     98 
     99 /* This implementation will give problems on files larger than INT_MAX. */
    100 static void
    101 processFile(const void *data, size_t size, const XML_Char *filename,
    102             void *args) {
    103   XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
    104   int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
    105   if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
    106     reportError(parser, filename);
    107     *retPtr = 0;
    108   } else
    109     *retPtr = 1;
    110 }
    111 
    112 #if defined(_WIN32)
    113 
    114 static int
    115 isAsciiLetter(XML_Char c) {
    116   return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
    117 }
    118 
    119 #endif /* _WIN32 */
    120 
    121 static const XML_Char *
    122 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
    123                 XML_Char **toFree) {
    124   XML_Char *s;
    125   *toFree = 0;
    126   if (! base || *systemId == T('/')
    127 #if defined(_WIN32)
    128       || *systemId == T('\\')
    129       || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
    130 #endif
    131   )
    132     return systemId;
    133   *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
    134                                * sizeof(XML_Char));
    135   if (! *toFree)
    136     return systemId;
    137   tcscpy(*toFree, base);
    138   s = *toFree;
    139   if (tcsrchr(s, T('/')))
    140     s = tcsrchr(s, T('/')) + 1;
    141 #if defined(_WIN32)
    142   if (tcsrchr(s, T('\\')))
    143     s = tcsrchr(s, T('\\')) + 1;
    144 #endif
    145   tcscpy(s, systemId);
    146   return *toFree;
    147 }
    148 
    149 static int
    150 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
    151                          const XML_Char *base, const XML_Char *systemId,
    152                          const XML_Char *publicId) {
    153   int result;
    154   XML_Char *s;
    155   const XML_Char *filename;
    156   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
    157   int filemapRes;
    158   PROCESS_ARGS args;
    159   UNUSED_P(publicId);
    160   args.retPtr = &result;
    161   args.parser = entParser;
    162   filename = resolveSystemId(base, systemId, &s);
    163   XML_SetBase(entParser, filename);
    164   filemapRes = filemap(filename, processFile, &args);
    165   switch (filemapRes) {
    166   case 0:
    167     result = 0;
    168     break;
    169   case 2:
    170     ftprintf(stderr,
    171              T("%s: file too large for memory-mapping")
    172                  T(", switching to streaming\n"),
    173              filename);
    174     result = processStream(filename, entParser);
    175     break;
    176   }
    177   free(s);
    178   XML_ParserFree(entParser);
    179   return result;
    180 }
    181 
    182 static int
    183 processStream(const XML_Char *filename, XML_Parser parser) {
    184   /* passing NULL for filename means read input from stdin */
    185   int fd = 0; /* 0 is the fileno for stdin */
    186 
    187   if (filename != NULL) {
    188     fd = topen(filename, O_BINARY | O_RDONLY);
    189     if (fd < 0) {
    190       tperror(filename);
    191       return 0;
    192     }
    193   }
    194   for (;;) {
    195     int nread;
    196     char *buf = (char *)XML_GetBuffer(parser, g_read_size_bytes);
    197     if (! buf) {
    198       if (filename != NULL)
    199         close(fd);
    200       ftprintf(stderr, T("%s: out of memory\n"),
    201                filename != NULL ? filename : T("xmlwf"));
    202       return 0;
    203     }
    204     nread = read(fd, buf, g_read_size_bytes);
    205     if (nread < 0) {
    206       tperror(filename != NULL ? filename : T("STDIN"));
    207       if (filename != NULL)
    208         close(fd);
    209       return 0;
    210     }
    211     if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
    212       reportError(parser, filename != NULL ? filename : T("STDIN"));
    213       if (filename != NULL)
    214         close(fd);
    215       return 0;
    216     }
    217     if (nread == 0) {
    218       if (filename != NULL)
    219         close(fd);
    220       break;
    221       ;
    222     }
    223   }
    224   return 1;
    225 }
    226 
    227 static int
    228 externalEntityRefStream(XML_Parser parser, const XML_Char *context,
    229                         const XML_Char *base, const XML_Char *systemId,
    230                         const XML_Char *publicId) {
    231   XML_Char *s;
    232   const XML_Char *filename;
    233   int ret;
    234   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
    235   UNUSED_P(publicId);
    236   filename = resolveSystemId(base, systemId, &s);
    237   XML_SetBase(entParser, filename);
    238   ret = processStream(filename, entParser);
    239   free(s);
    240   XML_ParserFree(entParser);
    241   return ret;
    242 }
    243 
    244 int
    245 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
    246   int result;
    247 
    248   if (! XML_SetBase(parser, filename)) {
    249     ftprintf(stderr, T("%s: out of memory"), filename);
    250     exit(1);
    251   }
    252 
    253   if (flags & XML_EXTERNAL_ENTITIES)
    254     XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
    255                                                 ? externalEntityRefFilemap
    256                                                 : externalEntityRefStream);
    257   if (flags & XML_MAP_FILE) {
    258     int filemapRes;
    259     PROCESS_ARGS args;
    260     args.retPtr = &result;
    261     args.parser = parser;
    262     filemapRes = filemap(filename, processFile, &args);
    263     switch (filemapRes) {
    264     case 0:
    265       result = 0;
    266       break;
    267     case 2:
    268       ftprintf(stderr,
    269                T("%s: file too large for memory-mapping")
    270                    T(", switching to streaming\n"),
    271                filename);
    272       result = processStream(filename, parser);
    273       break;
    274     }
    275   } else
    276     result = processStream(filename, parser);
    277   return result;
    278 }
    279