Home | History | Annotate | Line # | Download | only in scanf
doscan.c revision 1.1.1.2
      1      1.1  mrg /* __gmp_doscan -- formatted input internals.
      2      1.1  mrg 
      3      1.1  mrg    THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
      4      1.1  mrg    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
      5      1.1  mrg    FUTURE GNU MP RELEASES.
      6      1.1  mrg 
      7  1.1.1.2  mrg Copyright 2001-2003 Free Software Foundation, Inc.
      8      1.1  mrg 
      9      1.1  mrg This file is part of the GNU MP Library.
     10      1.1  mrg 
     11      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     12  1.1.1.2  mrg it under the terms of either:
     13  1.1.1.2  mrg 
     14  1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
     15  1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
     16  1.1.1.2  mrg     option) any later version.
     17  1.1.1.2  mrg 
     18  1.1.1.2  mrg or
     19  1.1.1.2  mrg 
     20  1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
     21  1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
     22  1.1.1.2  mrg     later version.
     23  1.1.1.2  mrg 
     24  1.1.1.2  mrg or both in parallel, as here.
     25      1.1  mrg 
     26      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     27      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     28  1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     29  1.1.1.2  mrg for more details.
     30      1.1  mrg 
     31  1.1.1.2  mrg You should have received copies of the GNU General Public License and the
     32  1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     33  1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
     34      1.1  mrg 
     35      1.1  mrg #define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
     36      1.1  mrg 
     37  1.1.1.2  mrg #include "config.h"	/* needed for the HAVE_, could also move gmp incls */
     38      1.1  mrg 
     39      1.1  mrg #include <stdarg.h>
     40      1.1  mrg #include <ctype.h>
     41      1.1  mrg #include <stddef.h>    /* for ptrdiff_t */
     42      1.1  mrg #include <stdio.h>
     43      1.1  mrg #include <stdlib.h>    /* for strtol */
     44      1.1  mrg #include <string.h>
     45      1.1  mrg 
     46      1.1  mrg #if HAVE_LANGINFO_H
     47      1.1  mrg #include <langinfo.h>  /* for nl_langinfo */
     48      1.1  mrg #endif
     49      1.1  mrg 
     50      1.1  mrg #if HAVE_LOCALE_H
     51      1.1  mrg #include <locale.h>    /* for localeconv */
     52      1.1  mrg #endif
     53      1.1  mrg 
     54      1.1  mrg #if HAVE_INTTYPES_H
     55      1.1  mrg # include <inttypes.h> /* for intmax_t */
     56      1.1  mrg #else
     57      1.1  mrg # if HAVE_STDINT_H
     58      1.1  mrg #  include <stdint.h>
     59      1.1  mrg # endif
     60      1.1  mrg #endif
     61      1.1  mrg 
     62      1.1  mrg #if HAVE_SYS_TYPES_H
     63      1.1  mrg #include <sys/types.h> /* for quad_t */
     64      1.1  mrg #endif
     65      1.1  mrg 
     66      1.1  mrg #include "gmp.h"
     67      1.1  mrg #include "gmp-impl.h"
     68      1.1  mrg 
     69      1.1  mrg 
     70      1.1  mrg /* Change this to "#define TRACE(x) x" for some traces. */
     71      1.1  mrg #define TRACE(x)
     72      1.1  mrg 
     73      1.1  mrg 
     74      1.1  mrg /* General:
     75      1.1  mrg 
     76      1.1  mrg        It's necessary to parse up the format string to recognise the GMP
     77      1.1  mrg        extra types F, Q and Z.  Other types and conversions are passed
     78      1.1  mrg        across to the standard sscanf or fscanf via funs->scan, for ease of
     79      1.1  mrg        implementation.  This is essential in the case of something like glibc
     80      1.1  mrg        %p where the pointer format isn't actually documented.
     81      1.1  mrg 
     82      1.1  mrg        Because funs->scan doesn't get the whole input it can't put the right
     83      1.1  mrg        values in for %n, so that's handled in __gmp_doscan.  Neither sscanf
     84      1.1  mrg        nor fscanf directly indicate how many characters were read, so an
     85      1.1  mrg        extra %n is appended to each run for that.  For fscanf this merely
     86      1.1  mrg        supports our %n output, but for sscanf it lets funs->step move us
     87      1.1  mrg        along the input string.
     88      1.1  mrg 
     89      1.1  mrg        Whitespace and literal matches in the format string, including %%,
     90      1.1  mrg        are handled directly within __gmp_doscan.  This is reasonably
     91      1.1  mrg        efficient, and avoids some suspicious behaviour observed in various
     92      1.1  mrg        system libc's.  GLIBC 2.2.4 for instance returns 0 on
     93      1.1  mrg 
     94      1.1  mrg 	   sscanf(" ", " x")
     95      1.1  mrg        or
     96      1.1  mrg 	   sscanf(" ", " x%d",&n)
     97      1.1  mrg 
     98      1.1  mrg        whereas we think they should return EOF, since end-of-string is
     99      1.1  mrg        reached when a match of "x" is required.
    100      1.1  mrg 
    101      1.1  mrg        For standard % conversions, funs->scan is called once for each
    102      1.1  mrg        conversion.  If we had vfscanf and vsscanf and could rely on their
    103      1.1  mrg        fixed text matching behaviour then we could call them with multiple
    104      1.1  mrg        consecutive standard conversions.  But plain fscanf and sscanf work
    105      1.1  mrg        fine, and parsing one field at a time shouldn't be too much of a
    106      1.1  mrg        slowdown.
    107      1.1  mrg 
    108      1.1  mrg    gmpscan:
    109      1.1  mrg 
    110      1.1  mrg        gmpscan reads a gmp type.  It's only used from one place, but is a
    111      1.1  mrg        separate subroutine to avoid a big chunk of complicated code in the
    112      1.1  mrg        middle of __gmp_doscan.  Within gmpscan a couple of loopbacks make it
    113      1.1  mrg        possible to share code for parsing integers, rationals and floats.
    114      1.1  mrg 
    115      1.1  mrg        In gmpscan normally one char of lookahead is maintained, but when width
    116      1.1  mrg        is reached that stops, on the principle that an fgetc/ungetc of a char
    117      1.1  mrg        past where we're told to stop would be undesirable.  "chars" is how many
    118      1.1  mrg        characters have been read so far, including the current c.  When
    119      1.1  mrg        chars==width and another character is desired then a jump is done to the
    120      1.1  mrg        "convert" stage.  c is invalid and mustn't be unget'ed in this case;
    121      1.1  mrg        chars is set to width+1 to indicate that.
    122      1.1  mrg 
    123      1.1  mrg        gmpscan normally returns the number of characters read.  -1 means an
    124      1.1  mrg        invalid field, -2 means EOF reached before any matching characters
    125      1.1  mrg        were read.
    126      1.1  mrg 
    127      1.1  mrg        For hex floats, the mantissa part is passed to mpf_set_str, then the
    128      1.1  mrg        exponent is applied with mpf_mul_exp or mpf_div_2exp.  This is easier
    129      1.1  mrg        than teaching mpf_set_str about an exponent factor (ie. 2) differing
    130      1.1  mrg        from the mantissa radix point factor (ie. 16).  mpf_mul_exp and
    131      1.1  mrg        mpf_div_2exp will preserve the application requested precision, so
    132      1.1  mrg        nothing in that respect is lost by making this a two-step process.
    133      1.1  mrg 
    134      1.1  mrg    Matching and errors:
    135      1.1  mrg 
    136      1.1  mrg        C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest
    137      1.1  mrg        string which is a match for the appropriate type, or a prefix of a
    138      1.1  mrg        match.  With that done, if it's only a prefix then the result is a
    139      1.1  mrg        matching failure, ie. invalid input.
    140      1.1  mrg 
    141      1.1  mrg        This rule seems fairly clear, but doesn't seem to be universally
    142      1.1  mrg        applied in system C libraries.  Even GLIBC doesn't seem to get it
    143      1.1  mrg        right, insofar as it seems to accept some apparently invalid forms.
    144      1.1  mrg        Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the
    145      1.1  mrg        standard would suggest a non-empty sequence of digits should be
    146      1.1  mrg        required after an "0x".
    147      1.1  mrg 
    148      1.1  mrg        A footnote to 7.19.6.2 para 17 notes how this input item reading can
    149      1.1  mrg        mean inputs acceptable to strtol are not acceptable to fscanf.  We
    150      1.1  mrg        think this confirms our reading of "0x" as invalid.
    151      1.1  mrg 
    152      1.1  mrg        Clearly gmp_sscanf could backtrack to a longest input which was a
    153      1.1  mrg        valid match for a given item, but this is not done, since C99 says
    154      1.1  mrg        sscanf is identical to fscanf, so we make gmp_sscanf identical to
    155      1.1  mrg        gmp_fscanf.
    156      1.1  mrg 
    157      1.1  mrg    Types:
    158      1.1  mrg 
    159      1.1  mrg        C99 says "ll" is for long long, and "L" is for long double floats.
    160      1.1  mrg        Unfortunately in GMP 4.1.1 we documented the two as equivalent.  This
    161      1.1  mrg        doesn't affect us directly, since both are passed through to plain
    162      1.1  mrg        scanf.  It seems wisest not to try to enforce the C99 rule.  This is
    163      1.1  mrg        consistent with what we said before, though whether it actually
    164      1.1  mrg        worked was always up to the C library.
    165      1.1  mrg 
    166      1.1  mrg    Alternatives:
    167      1.1  mrg 
    168      1.1  mrg        Consideration was given to using separate code for gmp_fscanf and
    169      1.1  mrg        gmp_sscanf.  The sscanf case could zip across a string doing literal
    170      1.1  mrg        matches or recognising digits in gmpscan, rather than making a
    171      1.1  mrg        function call fun->get per character.  The fscanf could use getc
    172      1.1  mrg        rather than fgetc too, which might help those systems where getc is a
    173      1.1  mrg        macro or otherwise inlined.  But none of this scanning and converting
    174      1.1  mrg        will be particularly fast, so the two are done together to keep it a
    175      1.1  mrg        little simpler for now.
    176      1.1  mrg 
    177      1.1  mrg        Various multibyte string issues are not addressed, for a start C99
    178      1.1  mrg        scanf says the format string is multibyte.  Since we pass %c, %s and
    179      1.1  mrg        %[ to the system scanf, they might do multibyte reads already, but
    180      1.1  mrg        it's another matter whether or not that can be used, since our digit
    181      1.1  mrg        and whitespace parsing is only unibyte.  The plan is to quietly
    182      1.1  mrg        ignore multibyte locales for now.  This is not as bad as it sounds,
    183      1.1  mrg        since GMP is presumably used mostly on numbers, which can be
    184      1.1  mrg        perfectly adequately treated in plain ASCII.
    185      1.1  mrg 
    186      1.1  mrg */
    187      1.1  mrg 
    188      1.1  mrg 
    189      1.1  mrg struct gmp_doscan_params_t {
    190      1.1  mrg   int	base;
    191      1.1  mrg   int	ignore;
    192      1.1  mrg   char	type;
    193      1.1  mrg   int	width;
    194      1.1  mrg };
    195      1.1  mrg 
    196      1.1  mrg 
    197      1.1  mrg #define GET(c)			\
    198      1.1  mrg   do {				\
    199      1.1  mrg     ASSERT (chars <= width);	\
    200      1.1  mrg     chars++;			\
    201      1.1  mrg     if (chars > width)		\
    202      1.1  mrg       goto convert;		\
    203      1.1  mrg     (c) = (*funs->get) (data);	\
    204      1.1  mrg   } while (0)
    205      1.1  mrg 
    206      1.1  mrg /* store into "s", extending if necessary */
    207      1.1  mrg #define STORE(c)							\
    208      1.1  mrg   do {									\
    209      1.1  mrg     ASSERT (s_upto <= s_alloc);						\
    210      1.1  mrg     if (s_upto >= s_alloc)						\
    211      1.1  mrg       {									\
    212      1.1  mrg 	size_t	s_alloc_new = s_alloc + S_ALLOC_STEP;			\
    213      1.1  mrg 	s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \
    214      1.1  mrg 	s_alloc = s_alloc_new;						\
    215      1.1  mrg       }									\
    216      1.1  mrg     s[s_upto++] = c;							\
    217      1.1  mrg   } while (0)
    218      1.1  mrg 
    219      1.1  mrg #define S_ALLOC_STEP  512
    220      1.1  mrg 
    221      1.1  mrg static int
    222      1.1  mrg gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
    223      1.1  mrg 	 const struct gmp_doscan_params_t *p, void *dst)
    224      1.1  mrg {
    225      1.1  mrg   int	  chars, c, base, first, width, seen_point, seen_digit, hexfloat;
    226      1.1  mrg   size_t  s_upto, s_alloc, hexexp;
    227      1.1  mrg   char	  *s;
    228      1.1  mrg   int	  invalid = 0;
    229      1.1  mrg 
    230      1.1  mrg   TRACE (printf ("gmpscan\n"));
    231      1.1  mrg 
    232      1.1  mrg   ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
    233      1.1  mrg 
    234      1.1  mrg   c = (*funs->get) (data);
    235      1.1  mrg   if (c == EOF)
    236      1.1  mrg     return -2;
    237      1.1  mrg 
    238      1.1  mrg   chars = 1;
    239      1.1  mrg   first = 1;
    240      1.1  mrg   seen_point = 0;
    241      1.1  mrg   width = (p->width == 0 ? INT_MAX-1 : p->width);
    242      1.1  mrg   base = p->base;
    243      1.1  mrg   s_alloc = S_ALLOC_STEP;
    244      1.1  mrg   s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);
    245      1.1  mrg   s_upto = 0;
    246      1.1  mrg   hexfloat = 0;
    247      1.1  mrg   hexexp = 0;
    248      1.1  mrg 
    249      1.1  mrg  another:
    250      1.1  mrg   seen_digit = 0;
    251      1.1  mrg   if (c == '-')
    252      1.1  mrg     {
    253      1.1  mrg       STORE (c);
    254      1.1  mrg       goto get_for_sign;
    255      1.1  mrg     }
    256      1.1  mrg   else if (c == '+')
    257      1.1  mrg     {
    258      1.1  mrg       /* don't store '+', it's not accepted by mpz_set_str etc */
    259      1.1  mrg     get_for_sign:
    260      1.1  mrg       GET (c);
    261      1.1  mrg     }
    262      1.1  mrg 
    263      1.1  mrg   if (base == 0)
    264      1.1  mrg     {
    265      1.1  mrg       base = 10;		  /* decimal if no base indicator */
    266      1.1  mrg       if (c == '0')
    267      1.1  mrg 	{
    268      1.1  mrg 	  seen_digit = 1;	  /* 0 alone is a valid number */
    269      1.1  mrg 	  if (p->type != 'F')
    270      1.1  mrg 	    base = 8;		  /* leading 0 is octal, for non-floats */
    271      1.1  mrg 	  STORE (c);
    272      1.1  mrg 	  GET (c);
    273      1.1  mrg 	  if (c == 'x' || c == 'X')
    274      1.1  mrg 	    {
    275      1.1  mrg 	      base = 16;
    276      1.1  mrg 	      seen_digit = 0;	  /* must have digits after an 0x */
    277      1.1  mrg 	      if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */
    278      1.1  mrg 		hexfloat = 1;
    279      1.1  mrg 	      else
    280      1.1  mrg 		STORE (c);
    281      1.1  mrg 	      GET (c);
    282      1.1  mrg 	    }
    283      1.1  mrg 	}
    284      1.1  mrg     }
    285      1.1  mrg 
    286      1.1  mrg  digits:
    287      1.1  mrg   for (;;)
    288      1.1  mrg     {
    289      1.1  mrg       if (base == 16)
    290      1.1  mrg 	{
    291      1.1  mrg 	  if (! isxdigit (c))
    292      1.1  mrg 	    break;
    293      1.1  mrg 	}
    294      1.1  mrg       else
    295      1.1  mrg 	{
    296      1.1  mrg 	  if (! isdigit (c))
    297      1.1  mrg 	    break;
    298      1.1  mrg 	  if (base == 8 && (c == '8' || c == '9'))
    299      1.1  mrg 	    break;
    300      1.1  mrg 	}
    301      1.1  mrg 
    302      1.1  mrg       seen_digit = 1;
    303      1.1  mrg       STORE (c);
    304      1.1  mrg       GET (c);
    305      1.1  mrg     }
    306      1.1  mrg 
    307      1.1  mrg   if (first)
    308      1.1  mrg     {
    309      1.1  mrg       /* decimal point */
    310      1.1  mrg       if (p->type == 'F' && ! seen_point)
    311      1.1  mrg 	{
    312      1.1  mrg 	  /* For a multi-character decimal point, if the first character is
    313      1.1  mrg 	     present then all of it must be, otherwise the input is
    314      1.1  mrg 	     considered invalid.  */
    315      1.1  mrg 	  const char  *point = GMP_DECIMAL_POINT;
    316      1.1  mrg 	  int	      pc = (unsigned char) *point++;
    317      1.1  mrg 	  if (c == pc)
    318      1.1  mrg 	    {
    319      1.1  mrg 	      for (;;)
    320      1.1  mrg 		{
    321      1.1  mrg 		  STORE (c);
    322      1.1  mrg 		  GET (c);
    323      1.1  mrg 		  pc = (unsigned char) *point++;
    324      1.1  mrg 		  if (pc == '\0')
    325      1.1  mrg 		    break;
    326      1.1  mrg 		  if (c != pc)
    327      1.1  mrg 		    goto set_invalid;
    328      1.1  mrg 		}
    329      1.1  mrg 	      seen_point = 1;
    330      1.1  mrg 	      goto digits;
    331      1.1  mrg 	    }
    332      1.1  mrg 	}
    333      1.1  mrg 
    334      1.1  mrg       /* exponent */
    335      1.1  mrg       if (p->type == 'F')
    336      1.1  mrg 	{
    337      1.1  mrg 	  if (hexfloat && (c == 'p' || c == 'P'))
    338      1.1  mrg 	    {
    339      1.1  mrg 	      hexexp = s_upto; /* exponent location */
    340      1.1  mrg 	      base = 10;       /* exponent in decimal */
    341      1.1  mrg 	      goto exponent;
    342      1.1  mrg 	    }
    343      1.1  mrg 	  else if (! hexfloat && (c == 'e' || c == 'E'))
    344      1.1  mrg 	    {
    345      1.1  mrg 	    exponent:
    346      1.1  mrg 	      /* must have at least one digit in the mantissa, just an exponent
    347      1.1  mrg 		 is not good enough */
    348      1.1  mrg 	      if (! seen_digit)
    349      1.1  mrg 		goto set_invalid;
    350      1.1  mrg 
    351      1.1  mrg 	    do_second:
    352      1.1  mrg 	      first = 0;
    353      1.1  mrg 	      STORE (c);
    354      1.1  mrg 	      GET (c);
    355      1.1  mrg 	      goto another;
    356      1.1  mrg 	    }
    357      1.1  mrg 	}
    358      1.1  mrg 
    359      1.1  mrg       /* denominator */
    360      1.1  mrg       if (p->type == 'Q' && c == '/')
    361      1.1  mrg 	{
    362      1.1  mrg 	  /* must have at least one digit in the numerator */
    363      1.1  mrg 	  if (! seen_digit)
    364      1.1  mrg 	    goto set_invalid;
    365      1.1  mrg 
    366      1.1  mrg 	  /* now look for at least one digit in the denominator */
    367      1.1  mrg 	  seen_digit = 0;
    368      1.1  mrg 
    369      1.1  mrg 	  /* allow the base to be redetermined for "%i" */
    370      1.1  mrg 	  base = p->base;
    371      1.1  mrg 	  goto do_second;
    372      1.1  mrg 	}
    373      1.1  mrg     }
    374      1.1  mrg 
    375      1.1  mrg  convert:
    376      1.1  mrg   if (! seen_digit)
    377      1.1  mrg     {
    378      1.1  mrg     set_invalid:
    379      1.1  mrg       invalid = 1;
    380      1.1  mrg       goto done;
    381      1.1  mrg     }
    382      1.1  mrg 
    383      1.1  mrg   if (! p->ignore)
    384      1.1  mrg     {
    385      1.1  mrg       STORE ('\0');
    386      1.1  mrg       TRACE (printf ("	convert \"%s\"\n", s));
    387      1.1  mrg 
    388      1.1  mrg       /* We ought to have parsed out a valid string above, so just test
    389      1.1  mrg 	 mpz_set_str etc with an ASSERT.  */
    390      1.1  mrg       switch (p->type) {
    391      1.1  mrg       case 'F':
    392      1.1  mrg 	{
    393      1.1  mrg 	  mpf_ptr  f = (mpf_ptr) dst;
    394      1.1  mrg 	  if (hexexp != 0)
    395      1.1  mrg 	    s[hexexp] = '\0';
    396      1.1  mrg 	  ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10));
    397      1.1  mrg 	  if (hexexp != 0)
    398      1.1  mrg 	    {
    399      1.1  mrg 	      char *dummy;
    400      1.1  mrg 	      long  exp;
    401      1.1  mrg 	      exp = strtol (s + hexexp + 1, &dummy, 10);
    402      1.1  mrg 	      if (exp >= 0)
    403      1.1  mrg 		mpf_mul_2exp (f, f, (unsigned long) exp);
    404      1.1  mrg 	      else
    405      1.1  mrg 		mpf_div_2exp (f, f, - (unsigned long) exp);
    406      1.1  mrg 	    }
    407      1.1  mrg 	}
    408      1.1  mrg 	break;
    409      1.1  mrg       case 'Q':
    410      1.1  mrg 	ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));
    411      1.1  mrg 	break;
    412      1.1  mrg       case 'Z':
    413      1.1  mrg 	ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));
    414      1.1  mrg 	break;
    415      1.1  mrg       default:
    416      1.1  mrg 	ASSERT (0);
    417      1.1  mrg 	/*FALLTHRU*/
    418      1.1  mrg 	break;
    419      1.1  mrg       }
    420      1.1  mrg     }
    421      1.1  mrg 
    422      1.1  mrg  done:
    423      1.1  mrg   ASSERT (chars <= width+1);
    424      1.1  mrg   if (chars != width+1)
    425      1.1  mrg     {
    426      1.1  mrg       (*funs->unget) (c, data);
    427      1.1  mrg       TRACE (printf ("	ungetc %d, to give %d chars\n", c, chars-1));
    428      1.1  mrg     }
    429      1.1  mrg   chars--;
    430      1.1  mrg 
    431      1.1  mrg   (*__gmp_free_func) (s, s_alloc);
    432      1.1  mrg 
    433      1.1  mrg   if (invalid)
    434      1.1  mrg     {
    435      1.1  mrg       TRACE (printf ("	invalid\n"));
    436      1.1  mrg       return -1;
    437      1.1  mrg     }
    438      1.1  mrg 
    439      1.1  mrg   TRACE (printf ("  return %d chars (cf width %d)\n", chars, width));
    440      1.1  mrg   return chars;
    441      1.1  mrg }
    442      1.1  mrg 
    443      1.1  mrg 
    444      1.1  mrg /* Read and discard whitespace, if any.  Return number of chars skipped.
    445      1.1  mrg    Whitespace skipping never provokes the EOF return from __gmp_doscan, so
    446      1.1  mrg    it's not necessary to watch for EOF from funs->get, */
    447      1.1  mrg static int
    448      1.1  mrg skip_white (const struct gmp_doscan_funs_t *funs, void *data)
    449      1.1  mrg {
    450      1.1  mrg   int  c;
    451      1.1  mrg   int  ret = 0;
    452      1.1  mrg 
    453      1.1  mrg   do
    454      1.1  mrg     {
    455      1.1  mrg       c = (funs->get) (data);
    456      1.1  mrg       ret++;
    457      1.1  mrg     }
    458      1.1  mrg   while (isspace (c));
    459      1.1  mrg 
    460      1.1  mrg   (funs->unget) (c, data);
    461      1.1  mrg   ret--;
    462      1.1  mrg 
    463      1.1  mrg   TRACE (printf ("  skip white %d\n", ret));
    464      1.1  mrg   return ret;
    465      1.1  mrg }
    466      1.1  mrg 
    467      1.1  mrg 
    468      1.1  mrg int
    469      1.1  mrg __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
    470      1.1  mrg 	      const char *orig_fmt, va_list orig_ap)
    471      1.1  mrg {
    472      1.1  mrg   struct gmp_doscan_params_t  param;
    473      1.1  mrg   va_list     ap;
    474      1.1  mrg   char	      *alloc_fmt;
    475      1.1  mrg   const char  *fmt, *this_fmt, *end_fmt;
    476      1.1  mrg   size_t      orig_fmt_len, alloc_fmt_size, len;
    477      1.1  mrg   int	      new_fields, new_chars;
    478      1.1  mrg   char	      fchar;
    479      1.1  mrg   int	      fields = 0;
    480      1.1  mrg   int	      chars = 0;
    481      1.1  mrg 
    482      1.1  mrg   TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt);
    483      1.1  mrg 	 if (funs->scan == (gmp_doscan_scan_t) sscanf)
    484      1.1  mrg 	   printf ("  s=\"%s\"\n", * (const char **) data));
    485      1.1  mrg 
    486      1.1  mrg   /* Don't modify orig_ap, if va_list is actually an array and hence call by
    487      1.1  mrg      reference.  It could be argued that it'd be more efficient to leave
    488      1.1  mrg      callers to make a copy if they care, but doing so here is going to be a
    489      1.1  mrg      very small part of the total work, and we may as well keep applications
    490      1.1  mrg      out of trouble.  */
    491      1.1  mrg   va_copy (ap, orig_ap);
    492      1.1  mrg 
    493      1.1  mrg   /* Parts of the format string are going to be copied so that a " %n" can
    494      1.1  mrg      be appended.  alloc_fmt is some space for that.  orig_fmt_len+4 will be
    495      1.1  mrg      needed if fmt consists of a single "%" specifier, but otherwise is an
    496      1.1  mrg      overestimate.  We're not going to be very fast here, so use
    497      1.1  mrg      __gmp_allocate_func rather than TMP_ALLOC.  */
    498      1.1  mrg   orig_fmt_len = strlen (orig_fmt);
    499      1.1  mrg   alloc_fmt_size = orig_fmt_len + 4;
    500      1.1  mrg   alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
    501      1.1  mrg 
    502      1.1  mrg   fmt = orig_fmt;
    503      1.1  mrg   end_fmt = orig_fmt + orig_fmt_len;
    504      1.1  mrg 
    505      1.1  mrg   for (;;)
    506      1.1  mrg     {
    507      1.1  mrg     next:
    508      1.1  mrg       fchar = *fmt++;
    509      1.1  mrg 
    510      1.1  mrg       if (fchar == '\0')
    511      1.1  mrg 	break;
    512      1.1  mrg 
    513      1.1  mrg       if (isspace (fchar))
    514      1.1  mrg 	{
    515      1.1  mrg 	  chars += skip_white (funs, data);
    516      1.1  mrg 	  continue;
    517      1.1  mrg 	}
    518      1.1  mrg 
    519      1.1  mrg       if (fchar != '%')
    520      1.1  mrg 	{
    521      1.1  mrg 	  int  c;
    522      1.1  mrg 	literal:
    523      1.1  mrg 	  c = (funs->get) (data);
    524      1.1  mrg 	  if (c != fchar)
    525      1.1  mrg 	    {
    526      1.1  mrg 	      (funs->unget) (c, data);
    527      1.1  mrg 	      if (c == EOF)
    528      1.1  mrg 		{
    529      1.1  mrg 		eof_no_match:
    530      1.1  mrg 		  if (fields == 0)
    531      1.1  mrg 		    fields = EOF;
    532      1.1  mrg 		}
    533      1.1  mrg 	      goto done;
    534      1.1  mrg 	    }
    535      1.1  mrg 	  chars++;
    536      1.1  mrg 	  continue;
    537      1.1  mrg 	}
    538      1.1  mrg 
    539      1.1  mrg       param.type = '\0';
    540      1.1  mrg       param.base = 0;	 /* for e,f,g,i */
    541      1.1  mrg       param.ignore = 0;
    542      1.1  mrg       param.width = 0;
    543      1.1  mrg 
    544      1.1  mrg       this_fmt = fmt-1;
    545      1.1  mrg       TRACE (printf ("	this_fmt \"%s\"\n", this_fmt));
    546      1.1  mrg 
    547      1.1  mrg       for (;;)
    548      1.1  mrg 	{
    549      1.1  mrg 	  ASSERT (fmt <= end_fmt);
    550      1.1  mrg 
    551      1.1  mrg 	  fchar = *fmt++;
    552      1.1  mrg 	  switch (fchar) {
    553      1.1  mrg 
    554      1.1  mrg 	  case '\0':  /* unterminated % sequence */
    555      1.1  mrg 	    ASSERT (0);
    556      1.1  mrg 	    goto done;
    557      1.1  mrg 
    558      1.1  mrg 	  case '%':   /* literal % */
    559      1.1  mrg 	    goto literal;
    560      1.1  mrg 
    561      1.1  mrg 	  case '[':   /* character range */
    562      1.1  mrg 	    fchar = *fmt++;
    563      1.1  mrg 	    if (fchar == '^')
    564      1.1  mrg 	      fchar = *fmt++;
    565      1.1  mrg 	    /* ']' allowed as the first char (possibly after '^') */
    566      1.1  mrg 	    if (fchar == ']')
    567      1.1  mrg 	      fchar = *fmt++;
    568      1.1  mrg 	    for (;;)
    569      1.1  mrg 	      {
    570      1.1  mrg 		ASSERT (fmt <= end_fmt);
    571      1.1  mrg 		if (fchar == '\0')
    572      1.1  mrg 		  {
    573      1.1  mrg 		    /* unterminated % sequence */
    574      1.1  mrg 		    ASSERT (0);
    575      1.1  mrg 		    goto done;
    576      1.1  mrg 		  }
    577      1.1  mrg 		if (fchar == ']')
    578      1.1  mrg 		  break;
    579      1.1  mrg 		fchar = *fmt++;
    580      1.1  mrg 	      }
    581      1.1  mrg 	    /*FALLTHRU*/
    582      1.1  mrg 	  case 'c':   /* characters */
    583      1.1  mrg 	  case 's':   /* string of non-whitespace */
    584      1.1  mrg 	  case 'p':   /* pointer */
    585      1.1  mrg 	  libc_type:
    586      1.1  mrg 	    len = fmt - this_fmt;
    587      1.1  mrg 	    memcpy (alloc_fmt, this_fmt, len);
    588      1.1  mrg 	    alloc_fmt[len++] = '%';
    589      1.1  mrg 	    alloc_fmt[len++] = 'n';
    590      1.1  mrg 	    alloc_fmt[len] = '\0';
    591      1.1  mrg 
    592      1.1  mrg 	    TRACE (printf ("  scan \"%s\"\n", alloc_fmt);
    593      1.1  mrg 		   if (funs->scan == (gmp_doscan_scan_t) sscanf)
    594      1.1  mrg 		     printf ("	s=\"%s\"\n", * (const char **) data));
    595      1.1  mrg 
    596      1.1  mrg 	    new_chars = -1;
    597      1.1  mrg 	    if (param.ignore)
    598      1.1  mrg 	      {
    599      1.1  mrg 		new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL);
    600      1.1  mrg 		ASSERT (new_fields == 0 || new_fields == EOF);
    601      1.1  mrg 	      }
    602      1.1  mrg 	    else
    603      1.1  mrg 	      {
    604      1.1  mrg 		void *arg = va_arg (ap, void *);
    605      1.1  mrg 		new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars);
    606      1.1  mrg 		ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
    607      1.1  mrg 
    608      1.1  mrg 		if (new_fields == 0)
    609      1.1  mrg 		  goto done;  /* invalid input */
    610      1.1  mrg 
    611      1.1  mrg 		if (new_fields == 1)
    612      1.1  mrg 		  ASSERT (new_chars != -1);
    613      1.1  mrg 	      }
    614      1.1  mrg 	    TRACE (printf ("  new_fields %d   new_chars %d\n",
    615      1.1  mrg 			   new_fields, new_chars));
    616      1.1  mrg 
    617      1.1  mrg 	    if (new_fields == -1)
    618      1.1  mrg 	      goto eof_no_match;  /* EOF before anything matched */
    619      1.1  mrg 
    620      1.1  mrg 	    /* Under param.ignore, when new_fields==0 we don't know if
    621      1.1  mrg 	       it's a successful match or an invalid field.  new_chars
    622      1.1  mrg 	       won't have been assigned if it was an invalid field.  */
    623      1.1  mrg 	    if (new_chars == -1)
    624      1.1  mrg 	      goto done;  /* invalid input */
    625      1.1  mrg 
    626      1.1  mrg 	    chars += new_chars;
    627      1.1  mrg 	    (*funs->step) (data, new_chars);
    628      1.1  mrg 
    629      1.1  mrg 	  increment_fields:
    630      1.1  mrg 	    if (! param.ignore)
    631      1.1  mrg 	      fields++;
    632      1.1  mrg 	    goto next;
    633      1.1  mrg 
    634      1.1  mrg 	  case 'd':   /* decimal */
    635      1.1  mrg 	  case 'u':   /* decimal */
    636      1.1  mrg 	    param.base = 10;
    637      1.1  mrg 	    goto numeric;
    638      1.1  mrg 
    639      1.1  mrg 	  case 'e':   /* float */
    640      1.1  mrg 	  case 'E':   /* float */
    641      1.1  mrg 	  case 'f':   /* float */
    642      1.1  mrg 	  case 'g':   /* float */
    643      1.1  mrg 	  case 'G':   /* float */
    644      1.1  mrg 	  case 'i':   /* integer with base marker */
    645      1.1  mrg 	  numeric:
    646      1.1  mrg 	    if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
    647      1.1  mrg 	      goto libc_type;
    648      1.1  mrg 
    649      1.1  mrg 	    chars += skip_white (funs, data);
    650      1.1  mrg 
    651      1.1  mrg 	    new_chars = gmpscan (funs, data, &param,
    652      1.1  mrg 				 param.ignore ? NULL : va_arg (ap, void*));
    653      1.1  mrg 	    if (new_chars == -2)
    654      1.1  mrg 	      goto eof_no_match;
    655      1.1  mrg 	    if (new_chars == -1)
    656      1.1  mrg 	      goto done;
    657      1.1  mrg 
    658      1.1  mrg 	    ASSERT (new_chars >= 0);
    659      1.1  mrg 	    chars += new_chars;
    660      1.1  mrg 	    goto increment_fields;
    661      1.1  mrg 
    662      1.1  mrg 	  case 'a':   /* glibc allocate string */
    663      1.1  mrg 	  case '\'':  /* glibc digit groupings */
    664      1.1  mrg 	    break;
    665      1.1  mrg 
    666      1.1  mrg 	  case 'F':   /* mpf_t */
    667      1.1  mrg 	  case 'j':   /* intmax_t */
    668      1.1  mrg 	  case 'L':   /* long long */
    669      1.1  mrg 	  case 'q':   /* quad_t */
    670      1.1  mrg 	  case 'Q':   /* mpq_t */
    671      1.1  mrg 	  case 't':   /* ptrdiff_t */
    672      1.1  mrg 	  case 'z':   /* size_t */
    673      1.1  mrg 	  case 'Z':   /* mpz_t */
    674      1.1  mrg 	  set_type:
    675      1.1  mrg 	    param.type = fchar;
    676      1.1  mrg 	    break;
    677      1.1  mrg 
    678      1.1  mrg 	  case 'h':   /* short or char */
    679      1.1  mrg 	    if (param.type != 'h')
    680      1.1  mrg 	      goto set_type;
    681      1.1  mrg 	    param.type = 'H';	/* internal code for "hh" */
    682      1.1  mrg 	    break;
    683      1.1  mrg 
    684      1.1  mrg 	    goto numeric;
    685      1.1  mrg 
    686      1.1  mrg 	  case 'l':   /* long, long long, double or long double */
    687      1.1  mrg 	    if (param.type != 'l')
    688      1.1  mrg 	      goto set_type;
    689      1.1  mrg 	    param.type = 'L';	/* "ll" means "L" */
    690      1.1  mrg 	    break;
    691      1.1  mrg 
    692      1.1  mrg 	  case 'n':
    693      1.1  mrg 	    if (! param.ignore)
    694      1.1  mrg 	      {
    695      1.1  mrg 		void  *p;
    696      1.1  mrg 		p = va_arg (ap, void *);
    697      1.1  mrg 		TRACE (printf ("  store %%n to %p\n", p));
    698      1.1  mrg 		switch (param.type) {
    699      1.1  mrg 		case '\0': * (int	*) p = chars; break;
    700      1.1  mrg 		case 'F':  mpf_set_si ((mpf_ptr) p, (long) chars); break;
    701      1.1  mrg 		case 'H':  * (char	*) p = chars; break;
    702      1.1  mrg 		case 'h':  * (short	*) p = chars; break;
    703      1.1  mrg #if HAVE_INTMAX_T
    704      1.1  mrg 		case 'j':  * (intmax_t	*) p = chars; break;
    705      1.1  mrg #else
    706      1.1  mrg 		case 'j':  ASSERT_FAIL (intmax_t not available); break;
    707      1.1  mrg #endif
    708      1.1  mrg 		case 'l':  * (long	*) p = chars; break;
    709      1.1  mrg #if HAVE_QUAD_T && HAVE_LONG_LONG
    710      1.1  mrg 		case 'q':
    711      1.1  mrg 		  ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
    712      1.1  mrg 		  /*FALLTHRU*/
    713      1.1  mrg #else
    714      1.1  mrg 		case 'q':  ASSERT_FAIL (quad_t not available); break;
    715      1.1  mrg #endif
    716      1.1  mrg #if HAVE_LONG_LONG
    717      1.1  mrg 		case 'L':  * (long long *) p = chars; break;
    718      1.1  mrg #else
    719      1.1  mrg 		case 'L':  ASSERT_FAIL (long long not available); break;
    720      1.1  mrg #endif
    721      1.1  mrg 		case 'Q':  mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
    722      1.1  mrg #if HAVE_PTRDIFF_T
    723      1.1  mrg 		case 't':  * (ptrdiff_t *) p = chars; break;
    724      1.1  mrg #else
    725      1.1  mrg 		case 't':  ASSERT_FAIL (ptrdiff_t not available); break;
    726      1.1  mrg #endif
    727      1.1  mrg 		case 'z':  * (size_t	*) p = chars; break;
    728      1.1  mrg 		case 'Z':  mpz_set_si ((mpz_ptr) p, (long) chars); break;
    729      1.1  mrg 		default: ASSERT (0); break;
    730      1.1  mrg 		}
    731      1.1  mrg 	      }
    732      1.1  mrg 	    goto next;
    733      1.1  mrg 
    734      1.1  mrg 	  case 'o':
    735      1.1  mrg 	    param.base = 8;
    736      1.1  mrg 	    goto numeric;
    737      1.1  mrg 
    738      1.1  mrg 	  case 'x':
    739      1.1  mrg 	  case 'X':
    740      1.1  mrg 	    param.base = 16;
    741      1.1  mrg 	    goto numeric;
    742      1.1  mrg 
    743      1.1  mrg 	  case '0': case '1': case '2': case '3': case '4':
    744      1.1  mrg 	  case '5': case '6': case '7': case '8': case '9':
    745      1.1  mrg 	    param.width = 0;
    746      1.1  mrg 	    do {
    747      1.1  mrg 	      param.width = param.width * 10 + (fchar-'0');
    748      1.1  mrg 	      fchar = *fmt++;
    749      1.1  mrg 	    } while (isdigit (fchar));
    750      1.1  mrg 	    fmt--; /* unget the non-digit */
    751      1.1  mrg 	    break;
    752      1.1  mrg 
    753      1.1  mrg 	  case '*':
    754      1.1  mrg 	    param.ignore = 1;
    755      1.1  mrg 	    break;
    756      1.1  mrg 
    757      1.1  mrg 	  default:
    758      1.1  mrg 	    /* something invalid in a % sequence */
    759      1.1  mrg 	    ASSERT (0);
    760      1.1  mrg 	    goto next;
    761      1.1  mrg 	  }
    762      1.1  mrg 	}
    763      1.1  mrg     }
    764      1.1  mrg 
    765      1.1  mrg  done:
    766      1.1  mrg   (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
    767      1.1  mrg   return fields;
    768      1.1  mrg }
    769