Home | History | Annotate | Line # | Download | only in scanf
      1      1.1  mrg /* __gmp_doscan -- formatted input internals.
      2      1.1  mrg 
      3      1.1  mrg    THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
      4      1.1  mrg    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
      5      1.1  mrg    FUTURE GNU MP RELEASES.
      6      1.1  mrg 
      7  1.1.1.2  mrg Copyright 2001-2003 Free Software Foundation, Inc.
      8      1.1  mrg 
      9      1.1  mrg This file is part of the GNU MP Library.
     10      1.1  mrg 
     11      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     12  1.1.1.2  mrg it under the terms of either:
     13  1.1.1.2  mrg 
     14  1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
     15  1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
     16  1.1.1.2  mrg     option) any later version.
     17  1.1.1.2  mrg 
     18  1.1.1.2  mrg or
     19  1.1.1.2  mrg 
     20  1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
     21  1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
     22  1.1.1.2  mrg     later version.
     23  1.1.1.2  mrg 
     24  1.1.1.2  mrg or both in parallel, as here.
     25      1.1  mrg 
     26      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     27      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     28  1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     29  1.1.1.2  mrg for more details.
     30      1.1  mrg 
     31  1.1.1.2  mrg You should have received copies of the GNU General Public License and the
     32  1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     33  1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
     34      1.1  mrg 
     35      1.1  mrg #define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
     36      1.1  mrg 
     37  1.1.1.2  mrg #include "config.h"	/* needed for the HAVE_, could also move gmp incls */
     38      1.1  mrg 
     39      1.1  mrg #include <stdarg.h>
     40      1.1  mrg #include <ctype.h>
     41      1.1  mrg #include <stddef.h>    /* for ptrdiff_t */
     42      1.1  mrg #include <stdio.h>
     43      1.1  mrg #include <stdlib.h>    /* for strtol */
     44      1.1  mrg #include <string.h>
     45      1.1  mrg 
     46      1.1  mrg #if HAVE_LANGINFO_H
     47      1.1  mrg #include <langinfo.h>  /* for nl_langinfo */
     48      1.1  mrg #endif
     49      1.1  mrg 
     50      1.1  mrg #if HAVE_LOCALE_H
     51      1.1  mrg #include <locale.h>    /* for localeconv */
     52      1.1  mrg #endif
     53      1.1  mrg 
     54      1.1  mrg #if HAVE_INTTYPES_H
     55      1.1  mrg # include <inttypes.h> /* for intmax_t */
     56      1.1  mrg #else
     57      1.1  mrg # if HAVE_STDINT_H
     58      1.1  mrg #  include <stdint.h>
     59      1.1  mrg # endif
     60      1.1  mrg #endif
     61      1.1  mrg 
     62      1.1  mrg #if HAVE_SYS_TYPES_H
     63      1.1  mrg #include <sys/types.h> /* for quad_t */
     64      1.1  mrg #endif
     65      1.1  mrg 
     66      1.1  mrg #include "gmp-impl.h"
     67      1.1  mrg 
     68      1.1  mrg 
     69      1.1  mrg /* Change this to "#define TRACE(x) x" for some traces. */
     70      1.1  mrg #define TRACE(x)
     71      1.1  mrg 
     72      1.1  mrg 
     73      1.1  mrg /* General:
     74      1.1  mrg 
     75      1.1  mrg        It's necessary to parse up the format string to recognise the GMP
     76      1.1  mrg        extra types F, Q and Z.  Other types and conversions are passed
     77      1.1  mrg        across to the standard sscanf or fscanf via funs->scan, for ease of
     78      1.1  mrg        implementation.  This is essential in the case of something like glibc
     79      1.1  mrg        %p where the pointer format isn't actually documented.
     80      1.1  mrg 
     81      1.1  mrg        Because funs->scan doesn't get the whole input it can't put the right
     82      1.1  mrg        values in for %n, so that's handled in __gmp_doscan.  Neither sscanf
     83      1.1  mrg        nor fscanf directly indicate how many characters were read, so an
     84      1.1  mrg        extra %n is appended to each run for that.  For fscanf this merely
     85      1.1  mrg        supports our %n output, but for sscanf it lets funs->step move us
     86      1.1  mrg        along the input string.
     87      1.1  mrg 
     88      1.1  mrg        Whitespace and literal matches in the format string, including %%,
     89      1.1  mrg        are handled directly within __gmp_doscan.  This is reasonably
     90      1.1  mrg        efficient, and avoids some suspicious behaviour observed in various
     91      1.1  mrg        system libc's.  GLIBC 2.2.4 for instance returns 0 on
     92      1.1  mrg 
     93      1.1  mrg 	   sscanf(" ", " x")
     94      1.1  mrg        or
     95      1.1  mrg 	   sscanf(" ", " x%d",&n)
     96      1.1  mrg 
     97      1.1  mrg        whereas we think they should return EOF, since end-of-string is
     98      1.1  mrg        reached when a match of "x" is required.
     99      1.1  mrg 
    100      1.1  mrg        For standard % conversions, funs->scan is called once for each
    101      1.1  mrg        conversion.  If we had vfscanf and vsscanf and could rely on their
    102      1.1  mrg        fixed text matching behaviour then we could call them with multiple
    103      1.1  mrg        consecutive standard conversions.  But plain fscanf and sscanf work
    104      1.1  mrg        fine, and parsing one field at a time shouldn't be too much of a
    105      1.1  mrg        slowdown.
    106      1.1  mrg 
    107      1.1  mrg    gmpscan:
    108      1.1  mrg 
    109      1.1  mrg        gmpscan reads a gmp type.  It's only used from one place, but is a
    110      1.1  mrg        separate subroutine to avoid a big chunk of complicated code in the
    111      1.1  mrg        middle of __gmp_doscan.  Within gmpscan a couple of loopbacks make it
    112      1.1  mrg        possible to share code for parsing integers, rationals and floats.
    113      1.1  mrg 
    114      1.1  mrg        In gmpscan normally one char of lookahead is maintained, but when width
    115      1.1  mrg        is reached that stops, on the principle that an fgetc/ungetc of a char
    116      1.1  mrg        past where we're told to stop would be undesirable.  "chars" is how many
    117      1.1  mrg        characters have been read so far, including the current c.  When
    118      1.1  mrg        chars==width and another character is desired then a jump is done to the
    119      1.1  mrg        "convert" stage.  c is invalid and mustn't be unget'ed in this case;
    120      1.1  mrg        chars is set to width+1 to indicate that.
    121      1.1  mrg 
    122      1.1  mrg        gmpscan normally returns the number of characters read.  -1 means an
    123      1.1  mrg        invalid field, -2 means EOF reached before any matching characters
    124      1.1  mrg        were read.
    125      1.1  mrg 
    126      1.1  mrg        For hex floats, the mantissa part is passed to mpf_set_str, then the
    127      1.1  mrg        exponent is applied with mpf_mul_exp or mpf_div_2exp.  This is easier
    128      1.1  mrg        than teaching mpf_set_str about an exponent factor (ie. 2) differing
    129      1.1  mrg        from the mantissa radix point factor (ie. 16).  mpf_mul_exp and
    130      1.1  mrg        mpf_div_2exp will preserve the application requested precision, so
    131      1.1  mrg        nothing in that respect is lost by making this a two-step process.
    132      1.1  mrg 
    133      1.1  mrg    Matching and errors:
    134      1.1  mrg 
    135      1.1  mrg        C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest
    136      1.1  mrg        string which is a match for the appropriate type, or a prefix of a
    137      1.1  mrg        match.  With that done, if it's only a prefix then the result is a
    138      1.1  mrg        matching failure, ie. invalid input.
    139      1.1  mrg 
    140      1.1  mrg        This rule seems fairly clear, but doesn't seem to be universally
    141      1.1  mrg        applied in system C libraries.  Even GLIBC doesn't seem to get it
    142      1.1  mrg        right, insofar as it seems to accept some apparently invalid forms.
    143      1.1  mrg        Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the
    144      1.1  mrg        standard would suggest a non-empty sequence of digits should be
    145      1.1  mrg        required after an "0x".
    146      1.1  mrg 
    147      1.1  mrg        A footnote to 7.19.6.2 para 17 notes how this input item reading can
    148      1.1  mrg        mean inputs acceptable to strtol are not acceptable to fscanf.  We
    149      1.1  mrg        think this confirms our reading of "0x" as invalid.
    150      1.1  mrg 
    151      1.1  mrg        Clearly gmp_sscanf could backtrack to a longest input which was a
    152      1.1  mrg        valid match for a given item, but this is not done, since C99 says
    153      1.1  mrg        sscanf is identical to fscanf, so we make gmp_sscanf identical to
    154      1.1  mrg        gmp_fscanf.
    155      1.1  mrg 
    156      1.1  mrg    Types:
    157      1.1  mrg 
    158      1.1  mrg        C99 says "ll" is for long long, and "L" is for long double floats.
    159      1.1  mrg        Unfortunately in GMP 4.1.1 we documented the two as equivalent.  This
    160      1.1  mrg        doesn't affect us directly, since both are passed through to plain
    161      1.1  mrg        scanf.  It seems wisest not to try to enforce the C99 rule.  This is
    162      1.1  mrg        consistent with what we said before, though whether it actually
    163      1.1  mrg        worked was always up to the C library.
    164      1.1  mrg 
    165      1.1  mrg    Alternatives:
    166      1.1  mrg 
    167      1.1  mrg        Consideration was given to using separate code for gmp_fscanf and
    168      1.1  mrg        gmp_sscanf.  The sscanf case could zip across a string doing literal
    169      1.1  mrg        matches or recognising digits in gmpscan, rather than making a
    170      1.1  mrg        function call fun->get per character.  The fscanf could use getc
    171      1.1  mrg        rather than fgetc too, which might help those systems where getc is a
    172      1.1  mrg        macro or otherwise inlined.  But none of this scanning and converting
    173      1.1  mrg        will be particularly fast, so the two are done together to keep it a
    174      1.1  mrg        little simpler for now.
    175      1.1  mrg 
    176      1.1  mrg        Various multibyte string issues are not addressed, for a start C99
    177      1.1  mrg        scanf says the format string is multibyte.  Since we pass %c, %s and
    178      1.1  mrg        %[ to the system scanf, they might do multibyte reads already, but
    179      1.1  mrg        it's another matter whether or not that can be used, since our digit
    180      1.1  mrg        and whitespace parsing is only unibyte.  The plan is to quietly
    181      1.1  mrg        ignore multibyte locales for now.  This is not as bad as it sounds,
    182      1.1  mrg        since GMP is presumably used mostly on numbers, which can be
    183      1.1  mrg        perfectly adequately treated in plain ASCII.
    184      1.1  mrg 
    185      1.1  mrg */
    186      1.1  mrg 
    187      1.1  mrg 
    188      1.1  mrg struct gmp_doscan_params_t {
    189      1.1  mrg   int	base;
    190      1.1  mrg   int	ignore;
    191      1.1  mrg   char	type;
    192      1.1  mrg   int	width;
    193      1.1  mrg };
    194      1.1  mrg 
    195      1.1  mrg 
    196      1.1  mrg #define GET(c)			\
    197      1.1  mrg   do {				\
    198      1.1  mrg     ASSERT (chars <= width);	\
    199      1.1  mrg     chars++;			\
    200      1.1  mrg     if (chars > width)		\
    201      1.1  mrg       goto convert;		\
    202      1.1  mrg     (c) = (*funs->get) (data);	\
    203      1.1  mrg   } while (0)
    204      1.1  mrg 
    205      1.1  mrg /* store into "s", extending if necessary */
    206      1.1  mrg #define STORE(c)							\
    207      1.1  mrg   do {									\
    208      1.1  mrg     ASSERT (s_upto <= s_alloc);						\
    209      1.1  mrg     if (s_upto >= s_alloc)						\
    210      1.1  mrg       {									\
    211      1.1  mrg 	size_t	s_alloc_new = s_alloc + S_ALLOC_STEP;			\
    212      1.1  mrg 	s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \
    213      1.1  mrg 	s_alloc = s_alloc_new;						\
    214      1.1  mrg       }									\
    215      1.1  mrg     s[s_upto++] = c;							\
    216      1.1  mrg   } while (0)
    217      1.1  mrg 
    218      1.1  mrg #define S_ALLOC_STEP  512
    219      1.1  mrg 
    220      1.1  mrg static int
    221      1.1  mrg gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
    222      1.1  mrg 	 const struct gmp_doscan_params_t *p, void *dst)
    223      1.1  mrg {
    224      1.1  mrg   int	  chars, c, base, first, width, seen_point, seen_digit, hexfloat;
    225      1.1  mrg   size_t  s_upto, s_alloc, hexexp;
    226      1.1  mrg   char	  *s;
    227      1.1  mrg   int	  invalid = 0;
    228      1.1  mrg 
    229      1.1  mrg   TRACE (printf ("gmpscan\n"));
    230      1.1  mrg 
    231      1.1  mrg   ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
    232      1.1  mrg 
    233      1.1  mrg   c = (*funs->get) (data);
    234      1.1  mrg   if (c == EOF)
    235      1.1  mrg     return -2;
    236      1.1  mrg 
    237      1.1  mrg   chars = 1;
    238      1.1  mrg   first = 1;
    239      1.1  mrg   seen_point = 0;
    240      1.1  mrg   width = (p->width == 0 ? INT_MAX-1 : p->width);
    241      1.1  mrg   base = p->base;
    242      1.1  mrg   s_alloc = S_ALLOC_STEP;
    243      1.1  mrg   s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);
    244      1.1  mrg   s_upto = 0;
    245      1.1  mrg   hexfloat = 0;
    246      1.1  mrg   hexexp = 0;
    247      1.1  mrg 
    248      1.1  mrg  another:
    249      1.1  mrg   seen_digit = 0;
    250      1.1  mrg   if (c == '-')
    251      1.1  mrg     {
    252      1.1  mrg       STORE (c);
    253      1.1  mrg       goto get_for_sign;
    254      1.1  mrg     }
    255      1.1  mrg   else if (c == '+')
    256      1.1  mrg     {
    257      1.1  mrg       /* don't store '+', it's not accepted by mpz_set_str etc */
    258      1.1  mrg     get_for_sign:
    259      1.1  mrg       GET (c);
    260      1.1  mrg     }
    261      1.1  mrg 
    262      1.1  mrg   if (base == 0)
    263      1.1  mrg     {
    264      1.1  mrg       base = 10;		  /* decimal if no base indicator */
    265      1.1  mrg       if (c == '0')
    266      1.1  mrg 	{
    267      1.1  mrg 	  seen_digit = 1;	  /* 0 alone is a valid number */
    268      1.1  mrg 	  if (p->type != 'F')
    269      1.1  mrg 	    base = 8;		  /* leading 0 is octal, for non-floats */
    270      1.1  mrg 	  STORE (c);
    271      1.1  mrg 	  GET (c);
    272      1.1  mrg 	  if (c == 'x' || c == 'X')
    273      1.1  mrg 	    {
    274      1.1  mrg 	      base = 16;
    275      1.1  mrg 	      seen_digit = 0;	  /* must have digits after an 0x */
    276      1.1  mrg 	      if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */
    277      1.1  mrg 		hexfloat = 1;
    278      1.1  mrg 	      else
    279      1.1  mrg 		STORE (c);
    280      1.1  mrg 	      GET (c);
    281      1.1  mrg 	    }
    282      1.1  mrg 	}
    283      1.1  mrg     }
    284      1.1  mrg 
    285      1.1  mrg  digits:
    286      1.1  mrg   for (;;)
    287      1.1  mrg     {
    288      1.1  mrg       if (base == 16)
    289      1.1  mrg 	{
    290      1.1  mrg 	  if (! isxdigit (c))
    291      1.1  mrg 	    break;
    292      1.1  mrg 	}
    293      1.1  mrg       else
    294      1.1  mrg 	{
    295      1.1  mrg 	  if (! isdigit (c))
    296      1.1  mrg 	    break;
    297      1.1  mrg 	  if (base == 8 && (c == '8' || c == '9'))
    298      1.1  mrg 	    break;
    299      1.1  mrg 	}
    300      1.1  mrg 
    301      1.1  mrg       seen_digit = 1;
    302      1.1  mrg       STORE (c);
    303      1.1  mrg       GET (c);
    304      1.1  mrg     }
    305      1.1  mrg 
    306      1.1  mrg   if (first)
    307      1.1  mrg     {
    308      1.1  mrg       /* decimal point */
    309      1.1  mrg       if (p->type == 'F' && ! seen_point)
    310      1.1  mrg 	{
    311      1.1  mrg 	  /* For a multi-character decimal point, if the first character is
    312      1.1  mrg 	     present then all of it must be, otherwise the input is
    313      1.1  mrg 	     considered invalid.  */
    314      1.1  mrg 	  const char  *point = GMP_DECIMAL_POINT;
    315      1.1  mrg 	  int	      pc = (unsigned char) *point++;
    316      1.1  mrg 	  if (c == pc)
    317      1.1  mrg 	    {
    318      1.1  mrg 	      for (;;)
    319      1.1  mrg 		{
    320      1.1  mrg 		  STORE (c);
    321      1.1  mrg 		  GET (c);
    322      1.1  mrg 		  pc = (unsigned char) *point++;
    323      1.1  mrg 		  if (pc == '\0')
    324      1.1  mrg 		    break;
    325      1.1  mrg 		  if (c != pc)
    326      1.1  mrg 		    goto set_invalid;
    327      1.1  mrg 		}
    328      1.1  mrg 	      seen_point = 1;
    329      1.1  mrg 	      goto digits;
    330      1.1  mrg 	    }
    331      1.1  mrg 	}
    332      1.1  mrg 
    333      1.1  mrg       /* exponent */
    334      1.1  mrg       if (p->type == 'F')
    335      1.1  mrg 	{
    336      1.1  mrg 	  if (hexfloat && (c == 'p' || c == 'P'))
    337      1.1  mrg 	    {
    338      1.1  mrg 	      hexexp = s_upto; /* exponent location */
    339      1.1  mrg 	      base = 10;       /* exponent in decimal */
    340      1.1  mrg 	      goto exponent;
    341      1.1  mrg 	    }
    342      1.1  mrg 	  else if (! hexfloat && (c == 'e' || c == 'E'))
    343      1.1  mrg 	    {
    344      1.1  mrg 	    exponent:
    345      1.1  mrg 	      /* must have at least one digit in the mantissa, just an exponent
    346      1.1  mrg 		 is not good enough */
    347      1.1  mrg 	      if (! seen_digit)
    348      1.1  mrg 		goto set_invalid;
    349      1.1  mrg 
    350      1.1  mrg 	    do_second:
    351      1.1  mrg 	      first = 0;
    352      1.1  mrg 	      STORE (c);
    353      1.1  mrg 	      GET (c);
    354      1.1  mrg 	      goto another;
    355      1.1  mrg 	    }
    356      1.1  mrg 	}
    357      1.1  mrg 
    358      1.1  mrg       /* denominator */
    359      1.1  mrg       if (p->type == 'Q' && c == '/')
    360      1.1  mrg 	{
    361      1.1  mrg 	  /* must have at least one digit in the numerator */
    362      1.1  mrg 	  if (! seen_digit)
    363      1.1  mrg 	    goto set_invalid;
    364      1.1  mrg 
    365      1.1  mrg 	  /* now look for at least one digit in the denominator */
    366      1.1  mrg 	  seen_digit = 0;
    367      1.1  mrg 
    368      1.1  mrg 	  /* allow the base to be redetermined for "%i" */
    369      1.1  mrg 	  base = p->base;
    370      1.1  mrg 	  goto do_second;
    371      1.1  mrg 	}
    372      1.1  mrg     }
    373      1.1  mrg 
    374      1.1  mrg  convert:
    375      1.1  mrg   if (! seen_digit)
    376      1.1  mrg     {
    377      1.1  mrg     set_invalid:
    378      1.1  mrg       invalid = 1;
    379      1.1  mrg       goto done;
    380      1.1  mrg     }
    381      1.1  mrg 
    382      1.1  mrg   if (! p->ignore)
    383      1.1  mrg     {
    384      1.1  mrg       STORE ('\0');
    385      1.1  mrg       TRACE (printf ("	convert \"%s\"\n", s));
    386      1.1  mrg 
    387      1.1  mrg       /* We ought to have parsed out a valid string above, so just test
    388      1.1  mrg 	 mpz_set_str etc with an ASSERT.  */
    389      1.1  mrg       switch (p->type) {
    390      1.1  mrg       case 'F':
    391      1.1  mrg 	{
    392      1.1  mrg 	  mpf_ptr  f = (mpf_ptr) dst;
    393      1.1  mrg 	  if (hexexp != 0)
    394      1.1  mrg 	    s[hexexp] = '\0';
    395      1.1  mrg 	  ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10));
    396      1.1  mrg 	  if (hexexp != 0)
    397      1.1  mrg 	    {
    398      1.1  mrg 	      char *dummy;
    399      1.1  mrg 	      long  exp;
    400      1.1  mrg 	      exp = strtol (s + hexexp + 1, &dummy, 10);
    401      1.1  mrg 	      if (exp >= 0)
    402      1.1  mrg 		mpf_mul_2exp (f, f, (unsigned long) exp);
    403      1.1  mrg 	      else
    404  1.1.1.3  mrg 		mpf_div_2exp (f, f, NEG_CAST (unsigned long, exp));
    405      1.1  mrg 	    }
    406      1.1  mrg 	}
    407      1.1  mrg 	break;
    408      1.1  mrg       case 'Q':
    409      1.1  mrg 	ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));
    410      1.1  mrg 	break;
    411      1.1  mrg       case 'Z':
    412      1.1  mrg 	ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));
    413      1.1  mrg 	break;
    414      1.1  mrg       default:
    415      1.1  mrg 	ASSERT (0);
    416      1.1  mrg 	/*FALLTHRU*/
    417      1.1  mrg 	break;
    418      1.1  mrg       }
    419      1.1  mrg     }
    420      1.1  mrg 
    421      1.1  mrg  done:
    422      1.1  mrg   ASSERT (chars <= width+1);
    423      1.1  mrg   if (chars != width+1)
    424      1.1  mrg     {
    425      1.1  mrg       (*funs->unget) (c, data);
    426      1.1  mrg       TRACE (printf ("	ungetc %d, to give %d chars\n", c, chars-1));
    427      1.1  mrg     }
    428      1.1  mrg   chars--;
    429      1.1  mrg 
    430      1.1  mrg   (*__gmp_free_func) (s, s_alloc);
    431      1.1  mrg 
    432      1.1  mrg   if (invalid)
    433      1.1  mrg     {
    434      1.1  mrg       TRACE (printf ("	invalid\n"));
    435      1.1  mrg       return -1;
    436      1.1  mrg     }
    437      1.1  mrg 
    438      1.1  mrg   TRACE (printf ("  return %d chars (cf width %d)\n", chars, width));
    439      1.1  mrg   return chars;
    440      1.1  mrg }
    441      1.1  mrg 
    442      1.1  mrg 
    443      1.1  mrg /* Read and discard whitespace, if any.  Return number of chars skipped.
    444      1.1  mrg    Whitespace skipping never provokes the EOF return from __gmp_doscan, so
    445      1.1  mrg    it's not necessary to watch for EOF from funs->get, */
    446      1.1  mrg static int
    447      1.1  mrg skip_white (const struct gmp_doscan_funs_t *funs, void *data)
    448      1.1  mrg {
    449      1.1  mrg   int  c;
    450      1.1  mrg   int  ret = 0;
    451      1.1  mrg 
    452      1.1  mrg   do
    453      1.1  mrg     {
    454      1.1  mrg       c = (funs->get) (data);
    455      1.1  mrg       ret++;
    456      1.1  mrg     }
    457      1.1  mrg   while (isspace (c));
    458      1.1  mrg 
    459      1.1  mrg   (funs->unget) (c, data);
    460      1.1  mrg   ret--;
    461      1.1  mrg 
    462      1.1  mrg   TRACE (printf ("  skip white %d\n", ret));
    463      1.1  mrg   return ret;
    464      1.1  mrg }
    465      1.1  mrg 
    466      1.1  mrg 
    467      1.1  mrg int
    468      1.1  mrg __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
    469      1.1  mrg 	      const char *orig_fmt, va_list orig_ap)
    470      1.1  mrg {
    471      1.1  mrg   struct gmp_doscan_params_t  param;
    472      1.1  mrg   va_list     ap;
    473      1.1  mrg   char	      *alloc_fmt;
    474      1.1  mrg   const char  *fmt, *this_fmt, *end_fmt;
    475      1.1  mrg   size_t      orig_fmt_len, alloc_fmt_size, len;
    476      1.1  mrg   int	      new_fields, new_chars;
    477      1.1  mrg   char	      fchar;
    478      1.1  mrg   int	      fields = 0;
    479      1.1  mrg   int	      chars = 0;
    480      1.1  mrg 
    481      1.1  mrg   TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt);
    482      1.1  mrg 	 if (funs->scan == (gmp_doscan_scan_t) sscanf)
    483      1.1  mrg 	   printf ("  s=\"%s\"\n", * (const char **) data));
    484      1.1  mrg 
    485      1.1  mrg   /* Don't modify orig_ap, if va_list is actually an array and hence call by
    486      1.1  mrg      reference.  It could be argued that it'd be more efficient to leave
    487      1.1  mrg      callers to make a copy if they care, but doing so here is going to be a
    488      1.1  mrg      very small part of the total work, and we may as well keep applications
    489      1.1  mrg      out of trouble.  */
    490      1.1  mrg   va_copy (ap, orig_ap);
    491      1.1  mrg 
    492      1.1  mrg   /* Parts of the format string are going to be copied so that a " %n" can
    493      1.1  mrg      be appended.  alloc_fmt is some space for that.  orig_fmt_len+4 will be
    494      1.1  mrg      needed if fmt consists of a single "%" specifier, but otherwise is an
    495      1.1  mrg      overestimate.  We're not going to be very fast here, so use
    496      1.1  mrg      __gmp_allocate_func rather than TMP_ALLOC.  */
    497      1.1  mrg   orig_fmt_len = strlen (orig_fmt);
    498      1.1  mrg   alloc_fmt_size = orig_fmt_len + 4;
    499      1.1  mrg   alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
    500      1.1  mrg 
    501      1.1  mrg   fmt = orig_fmt;
    502      1.1  mrg   end_fmt = orig_fmt + orig_fmt_len;
    503      1.1  mrg 
    504      1.1  mrg   for (;;)
    505      1.1  mrg     {
    506      1.1  mrg     next:
    507      1.1  mrg       fchar = *fmt++;
    508      1.1  mrg 
    509      1.1  mrg       if (fchar == '\0')
    510      1.1  mrg 	break;
    511      1.1  mrg 
    512      1.1  mrg       if (isspace (fchar))
    513      1.1  mrg 	{
    514      1.1  mrg 	  chars += skip_white (funs, data);
    515      1.1  mrg 	  continue;
    516      1.1  mrg 	}
    517      1.1  mrg 
    518      1.1  mrg       if (fchar != '%')
    519      1.1  mrg 	{
    520      1.1  mrg 	  int  c;
    521      1.1  mrg 	literal:
    522      1.1  mrg 	  c = (funs->get) (data);
    523      1.1  mrg 	  if (c != fchar)
    524      1.1  mrg 	    {
    525      1.1  mrg 	      (funs->unget) (c, data);
    526      1.1  mrg 	      if (c == EOF)
    527      1.1  mrg 		{
    528      1.1  mrg 		eof_no_match:
    529      1.1  mrg 		  if (fields == 0)
    530      1.1  mrg 		    fields = EOF;
    531      1.1  mrg 		}
    532      1.1  mrg 	      goto done;
    533      1.1  mrg 	    }
    534      1.1  mrg 	  chars++;
    535      1.1  mrg 	  continue;
    536      1.1  mrg 	}
    537      1.1  mrg 
    538      1.1  mrg       param.type = '\0';
    539      1.1  mrg       param.base = 0;	 /* for e,f,g,i */
    540      1.1  mrg       param.ignore = 0;
    541      1.1  mrg       param.width = 0;
    542      1.1  mrg 
    543      1.1  mrg       this_fmt = fmt-1;
    544      1.1  mrg       TRACE (printf ("	this_fmt \"%s\"\n", this_fmt));
    545      1.1  mrg 
    546      1.1  mrg       for (;;)
    547      1.1  mrg 	{
    548      1.1  mrg 	  ASSERT (fmt <= end_fmt);
    549      1.1  mrg 
    550      1.1  mrg 	  fchar = *fmt++;
    551      1.1  mrg 	  switch (fchar) {
    552      1.1  mrg 
    553      1.1  mrg 	  case '\0':  /* unterminated % sequence */
    554      1.1  mrg 	    ASSERT (0);
    555      1.1  mrg 	    goto done;
    556      1.1  mrg 
    557      1.1  mrg 	  case '%':   /* literal % */
    558      1.1  mrg 	    goto literal;
    559      1.1  mrg 
    560      1.1  mrg 	  case '[':   /* character range */
    561      1.1  mrg 	    fchar = *fmt++;
    562      1.1  mrg 	    if (fchar == '^')
    563      1.1  mrg 	      fchar = *fmt++;
    564      1.1  mrg 	    /* ']' allowed as the first char (possibly after '^') */
    565      1.1  mrg 	    if (fchar == ']')
    566      1.1  mrg 	      fchar = *fmt++;
    567      1.1  mrg 	    for (;;)
    568      1.1  mrg 	      {
    569      1.1  mrg 		ASSERT (fmt <= end_fmt);
    570      1.1  mrg 		if (fchar == '\0')
    571      1.1  mrg 		  {
    572      1.1  mrg 		    /* unterminated % sequence */
    573      1.1  mrg 		    ASSERT (0);
    574      1.1  mrg 		    goto done;
    575      1.1  mrg 		  }
    576      1.1  mrg 		if (fchar == ']')
    577      1.1  mrg 		  break;
    578      1.1  mrg 		fchar = *fmt++;
    579      1.1  mrg 	      }
    580      1.1  mrg 	    /*FALLTHRU*/
    581      1.1  mrg 	  case 'c':   /* characters */
    582      1.1  mrg 	  case 's':   /* string of non-whitespace */
    583      1.1  mrg 	  case 'p':   /* pointer */
    584      1.1  mrg 	  libc_type:
    585      1.1  mrg 	    len = fmt - this_fmt;
    586      1.1  mrg 	    memcpy (alloc_fmt, this_fmt, len);
    587      1.1  mrg 	    alloc_fmt[len++] = '%';
    588      1.1  mrg 	    alloc_fmt[len++] = 'n';
    589      1.1  mrg 	    alloc_fmt[len] = '\0';
    590      1.1  mrg 
    591      1.1  mrg 	    TRACE (printf ("  scan \"%s\"\n", alloc_fmt);
    592      1.1  mrg 		   if (funs->scan == (gmp_doscan_scan_t) sscanf)
    593      1.1  mrg 		     printf ("	s=\"%s\"\n", * (const char **) data));
    594      1.1  mrg 
    595      1.1  mrg 	    new_chars = -1;
    596      1.1  mrg 	    if (param.ignore)
    597      1.1  mrg 	      {
    598      1.1  mrg 		new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL);
    599      1.1  mrg 		ASSERT (new_fields == 0 || new_fields == EOF);
    600      1.1  mrg 	      }
    601      1.1  mrg 	    else
    602      1.1  mrg 	      {
    603      1.1  mrg 		void *arg = va_arg (ap, void *);
    604      1.1  mrg 		new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars);
    605      1.1  mrg 		ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
    606      1.1  mrg 
    607      1.1  mrg 		if (new_fields == 0)
    608      1.1  mrg 		  goto done;  /* invalid input */
    609      1.1  mrg 
    610      1.1  mrg 		if (new_fields == 1)
    611      1.1  mrg 		  ASSERT (new_chars != -1);
    612      1.1  mrg 	      }
    613      1.1  mrg 	    TRACE (printf ("  new_fields %d   new_chars %d\n",
    614      1.1  mrg 			   new_fields, new_chars));
    615      1.1  mrg 
    616      1.1  mrg 	    if (new_fields == -1)
    617      1.1  mrg 	      goto eof_no_match;  /* EOF before anything matched */
    618      1.1  mrg 
    619      1.1  mrg 	    /* Under param.ignore, when new_fields==0 we don't know if
    620      1.1  mrg 	       it's a successful match or an invalid field.  new_chars
    621      1.1  mrg 	       won't have been assigned if it was an invalid field.  */
    622      1.1  mrg 	    if (new_chars == -1)
    623      1.1  mrg 	      goto done;  /* invalid input */
    624      1.1  mrg 
    625      1.1  mrg 	    chars += new_chars;
    626      1.1  mrg 	    (*funs->step) (data, new_chars);
    627      1.1  mrg 
    628      1.1  mrg 	  increment_fields:
    629      1.1  mrg 	    if (! param.ignore)
    630      1.1  mrg 	      fields++;
    631      1.1  mrg 	    goto next;
    632      1.1  mrg 
    633      1.1  mrg 	  case 'd':   /* decimal */
    634      1.1  mrg 	  case 'u':   /* decimal */
    635      1.1  mrg 	    param.base = 10;
    636      1.1  mrg 	    goto numeric;
    637      1.1  mrg 
    638      1.1  mrg 	  case 'e':   /* float */
    639      1.1  mrg 	  case 'E':   /* float */
    640      1.1  mrg 	  case 'f':   /* float */
    641      1.1  mrg 	  case 'g':   /* float */
    642      1.1  mrg 	  case 'G':   /* float */
    643      1.1  mrg 	  case 'i':   /* integer with base marker */
    644      1.1  mrg 	  numeric:
    645      1.1  mrg 	    if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
    646      1.1  mrg 	      goto libc_type;
    647      1.1  mrg 
    648      1.1  mrg 	    chars += skip_white (funs, data);
    649      1.1  mrg 
    650      1.1  mrg 	    new_chars = gmpscan (funs, data, &param,
    651      1.1  mrg 				 param.ignore ? NULL : va_arg (ap, void*));
    652      1.1  mrg 	    if (new_chars == -2)
    653      1.1  mrg 	      goto eof_no_match;
    654      1.1  mrg 	    if (new_chars == -1)
    655      1.1  mrg 	      goto done;
    656      1.1  mrg 
    657      1.1  mrg 	    ASSERT (new_chars >= 0);
    658      1.1  mrg 	    chars += new_chars;
    659      1.1  mrg 	    goto increment_fields;
    660      1.1  mrg 
    661      1.1  mrg 	  case 'a':   /* glibc allocate string */
    662      1.1  mrg 	  case '\'':  /* glibc digit groupings */
    663      1.1  mrg 	    break;
    664      1.1  mrg 
    665      1.1  mrg 	  case 'F':   /* mpf_t */
    666      1.1  mrg 	  case 'j':   /* intmax_t */
    667      1.1  mrg 	  case 'L':   /* long long */
    668      1.1  mrg 	  case 'q':   /* quad_t */
    669      1.1  mrg 	  case 'Q':   /* mpq_t */
    670      1.1  mrg 	  case 't':   /* ptrdiff_t */
    671      1.1  mrg 	  case 'z':   /* size_t */
    672      1.1  mrg 	  case 'Z':   /* mpz_t */
    673      1.1  mrg 	  set_type:
    674      1.1  mrg 	    param.type = fchar;
    675      1.1  mrg 	    break;
    676      1.1  mrg 
    677      1.1  mrg 	  case 'h':   /* short or char */
    678      1.1  mrg 	    if (param.type != 'h')
    679      1.1  mrg 	      goto set_type;
    680      1.1  mrg 	    param.type = 'H';	/* internal code for "hh" */
    681      1.1  mrg 	    break;
    682      1.1  mrg 
    683      1.1  mrg 	    goto numeric;
    684      1.1  mrg 
    685      1.1  mrg 	  case 'l':   /* long, long long, double or long double */
    686      1.1  mrg 	    if (param.type != 'l')
    687      1.1  mrg 	      goto set_type;
    688      1.1  mrg 	    param.type = 'L';	/* "ll" means "L" */
    689      1.1  mrg 	    break;
    690      1.1  mrg 
    691      1.1  mrg 	  case 'n':
    692      1.1  mrg 	    if (! param.ignore)
    693      1.1  mrg 	      {
    694      1.1  mrg 		void  *p;
    695      1.1  mrg 		p = va_arg (ap, void *);
    696      1.1  mrg 		TRACE (printf ("  store %%n to %p\n", p));
    697      1.1  mrg 		switch (param.type) {
    698      1.1  mrg 		case '\0': * (int	*) p = chars; break;
    699      1.1  mrg 		case 'F':  mpf_set_si ((mpf_ptr) p, (long) chars); break;
    700      1.1  mrg 		case 'H':  * (char	*) p = chars; break;
    701      1.1  mrg 		case 'h':  * (short	*) p = chars; break;
    702      1.1  mrg #if HAVE_INTMAX_T
    703      1.1  mrg 		case 'j':  * (intmax_t	*) p = chars; break;
    704      1.1  mrg #else
    705      1.1  mrg 		case 'j':  ASSERT_FAIL (intmax_t not available); break;
    706      1.1  mrg #endif
    707      1.1  mrg 		case 'l':  * (long	*) p = chars; break;
    708      1.1  mrg #if HAVE_QUAD_T && HAVE_LONG_LONG
    709      1.1  mrg 		case 'q':
    710      1.1  mrg 		  ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
    711      1.1  mrg 		  /*FALLTHRU*/
    712      1.1  mrg #else
    713      1.1  mrg 		case 'q':  ASSERT_FAIL (quad_t not available); break;
    714      1.1  mrg #endif
    715      1.1  mrg #if HAVE_LONG_LONG
    716      1.1  mrg 		case 'L':  * (long long *) p = chars; break;
    717      1.1  mrg #else
    718      1.1  mrg 		case 'L':  ASSERT_FAIL (long long not available); break;
    719      1.1  mrg #endif
    720      1.1  mrg 		case 'Q':  mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
    721      1.1  mrg #if HAVE_PTRDIFF_T
    722      1.1  mrg 		case 't':  * (ptrdiff_t *) p = chars; break;
    723      1.1  mrg #else
    724      1.1  mrg 		case 't':  ASSERT_FAIL (ptrdiff_t not available); break;
    725      1.1  mrg #endif
    726      1.1  mrg 		case 'z':  * (size_t	*) p = chars; break;
    727      1.1  mrg 		case 'Z':  mpz_set_si ((mpz_ptr) p, (long) chars); break;
    728      1.1  mrg 		default: ASSERT (0); break;
    729      1.1  mrg 		}
    730      1.1  mrg 	      }
    731      1.1  mrg 	    goto next;
    732      1.1  mrg 
    733      1.1  mrg 	  case 'o':
    734      1.1  mrg 	    param.base = 8;
    735      1.1  mrg 	    goto numeric;
    736      1.1  mrg 
    737      1.1  mrg 	  case 'x':
    738      1.1  mrg 	  case 'X':
    739      1.1  mrg 	    param.base = 16;
    740      1.1  mrg 	    goto numeric;
    741      1.1  mrg 
    742      1.1  mrg 	  case '0': case '1': case '2': case '3': case '4':
    743      1.1  mrg 	  case '5': case '6': case '7': case '8': case '9':
    744      1.1  mrg 	    param.width = 0;
    745      1.1  mrg 	    do {
    746      1.1  mrg 	      param.width = param.width * 10 + (fchar-'0');
    747      1.1  mrg 	      fchar = *fmt++;
    748      1.1  mrg 	    } while (isdigit (fchar));
    749      1.1  mrg 	    fmt--; /* unget the non-digit */
    750      1.1  mrg 	    break;
    751      1.1  mrg 
    752      1.1  mrg 	  case '*':
    753      1.1  mrg 	    param.ignore = 1;
    754      1.1  mrg 	    break;
    755      1.1  mrg 
    756      1.1  mrg 	  default:
    757      1.1  mrg 	    /* something invalid in a % sequence */
    758      1.1  mrg 	    ASSERT (0);
    759      1.1  mrg 	    goto next;
    760      1.1  mrg 	  }
    761      1.1  mrg 	}
    762      1.1  mrg     }
    763      1.1  mrg 
    764      1.1  mrg  done:
    765      1.1  mrg   (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
    766      1.1  mrg   return fields;
    767      1.1  mrg }
    768