Home | History | Annotate | Line # | Download | only in mpf
set_str.c revision 1.1.1.1.8.1
      1 /* mpf_set_str (dest, string, base) -- Convert the string STRING
      2    in base BASE to a float in dest.  If BASE is zero, the leading characters
      3    of STRING is used to figure out the base.
      4 
      5 Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2007,
      6 2008, 2011 Free Software Foundation, Inc.
      7 
      8 This file is part of the GNU MP Library.
      9 
     10 The GNU MP Library is free software; you can redistribute it and/or modify
     11 it under the terms of the GNU Lesser General Public License as published by
     12 the Free Software Foundation; either version 3 of the License, or (at your
     13 option) any later version.
     14 
     15 The GNU MP Library is distributed in the hope that it will be useful, but
     16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     17 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     18 License for more details.
     19 
     20 You should have received a copy of the GNU Lesser General Public License
     21 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     22 
     23 /*
     24   This still needs work, as suggested by some FIXME comments.
     25   1. Don't depend on superfluous mantissa digits.
     26   2. Allocate temp space more cleverly.
     27   3. Use mpn_div_q instead of mpn_lshift+mpn_divrem.
     28 */
     29 
     30 #define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
     31 
     32 #include "config.h"
     33 
     34 #include <stdlib.h>
     35 #include <string.h>
     36 #include <ctype.h>
     37 
     38 #if HAVE_LANGINFO_H
     39 #include <langinfo.h>  /* for nl_langinfo */
     40 #endif
     41 
     42 #if HAVE_LOCALE_H
     43 #include <locale.h>    /* for localeconv */
     44 #endif
     45 
     46 #include "gmp.h"
     47 #include "gmp-impl.h"
     48 #include "longlong.h"
     49 
     50 
     51 #define digit_value_tab __gmp_digit_value_tab
     52 
     53 /* Compute base^exp and return the most significant prec limbs in rp[].
     54    Put the count of omitted low limbs in *ign.
     55    Return the actual size (which might be less than prec).  */
     56 static mp_size_t
     57 mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,
     58 		    mp_limb_t base, mp_exp_t exp,
     59 		    mp_size_t prec, mp_ptr tp)
     60 {
     61   mp_size_t ign;		/* counts number of ignored low limbs in r */
     62   mp_size_t off;		/* keeps track of offset where value starts */
     63   mp_ptr passed_rp = rp;
     64   mp_size_t rn;
     65   int cnt;
     66   int i;
     67 
     68   rp[0] = base;
     69   rn = 1;
     70   off = 0;
     71   ign = 0;
     72   count_leading_zeros (cnt, exp);
     73   for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)
     74     {
     75       mpn_sqr (tp, rp + off, rn);
     76       rn = 2 * rn;
     77       rn -= tp[rn - 1] == 0;
     78       ign <<= 1;
     79 
     80       off = 0;
     81       if (rn > prec)
     82 	{
     83 	  ign += rn - prec;
     84 	  off = rn - prec;
     85 	  rn = prec;
     86 	}
     87       MP_PTR_SWAP (rp, tp);
     88 
     89       if (((exp >> i) & 1) != 0)
     90 	{
     91 	  mp_limb_t cy;
     92 	  cy = mpn_mul_1 (rp, rp + off, rn, base);
     93 	  rp[rn] = cy;
     94 	  rn += cy != 0;
     95 	  off = 0;
     96 	}
     97     }
     98 
     99   if (rn > prec)
    100     {
    101       ign += rn - prec;
    102       rp += rn - prec;
    103       rn = prec;
    104     }
    105 
    106   MPN_COPY_INCR (passed_rp, rp + off, rn);
    107   *ignp = ign;
    108   return rn;
    109 }
    110 
    111 int
    112 mpf_set_str (mpf_ptr x, const char *str, int base)
    113 {
    114   size_t str_size;
    115   char *s, *begs;
    116   size_t i, j;
    117   int c;
    118   int negative;
    119   char *dotpos = 0;
    120   const char *expptr;
    121   int exp_base;
    122   const char  *point = GMP_DECIMAL_POINT;
    123   size_t      pointlen = strlen (point);
    124   const unsigned char *digit_value;
    125   TMP_DECL;
    126 
    127   c = (unsigned char) *str;
    128 
    129   /* Skip whitespace.  */
    130   while (isspace (c))
    131     c = (unsigned char) *++str;
    132 
    133   negative = 0;
    134   if (c == '-')
    135     {
    136       negative = 1;
    137       c = (unsigned char) *++str;
    138     }
    139 
    140   /* Default base to decimal.  */
    141   if (base == 0)
    142     base = 10;
    143 
    144   exp_base = base;
    145 
    146   if (base < 0)
    147     {
    148       exp_base = 10;
    149       base = -base;
    150     }
    151 
    152   digit_value = digit_value_tab;
    153   if (base > 36)
    154     {
    155       /* For bases > 36, use the collating sequence
    156 	 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.  */
    157       digit_value += 224;
    158       if (base > 62)
    159 	return -1;		/* too large base */
    160     }
    161 
    162   /* Require at least one digit, possibly after an initial decimal point.  */
    163   if (digit_value[c] >= (base == 0 ? 10 : base))
    164     {
    165       /* not a digit, must be a decimal point */
    166       for (i = 0; i < pointlen; i++)
    167 	if (str[i] != point[i])
    168 	  return -1;
    169       if (digit_value[(unsigned char) str[pointlen]] >= (base == 0 ? 10 : base))
    170 	return -1;
    171     }
    172 
    173   /* Locate exponent part of the input.  Look from the right of the string,
    174      since the exponent is usually a lot shorter than the mantissa.  */
    175   expptr = NULL;
    176   str_size = strlen (str);
    177   for (i = str_size - 1; i > 0; i--)
    178     {
    179       c = (unsigned char) str[i];
    180       if (c == '@' || (base <= 10 && (c == 'e' || c == 'E')))
    181 	{
    182 	  expptr = str + i + 1;
    183 	  str_size = i;
    184 	  break;
    185 	}
    186     }
    187 
    188   TMP_MARK;
    189   s = begs = (char *) TMP_ALLOC (str_size + 1);
    190 
    191   /* Loop through mantissa, converting it from ASCII to raw byte values.  */
    192   for (i = 0; i < str_size; i++)
    193     {
    194       c = (unsigned char) *str;
    195       if (!isspace (c))
    196 	{
    197 	  int dig;
    198 
    199 	  for (j = 0; j < pointlen; j++)
    200 	    if (str[j] != point[j])
    201 	      goto not_point;
    202 	  if (1)
    203 	    {
    204 	      if (dotpos != 0)
    205 		{
    206 		  /* already saw a decimal point, another is invalid */
    207 		  TMP_FREE;
    208 		  return -1;
    209 		}
    210 	      dotpos = s;
    211 	      str += pointlen - 1;
    212 	      i += pointlen - 1;
    213 	    }
    214 	  else
    215 	    {
    216 	    not_point:
    217 	      dig = digit_value[c];
    218 	      if (dig >= base)
    219 		{
    220 		  TMP_FREE;
    221 		  return -1;
    222 		}
    223 	      *s++ = dig;
    224 	    }
    225 	}
    226       c = (unsigned char) *++str;
    227     }
    228 
    229   str_size = s - begs;
    230 
    231   {
    232     long exp_in_base;
    233     mp_size_t ra, ma, rn, mn;
    234     int cnt;
    235     mp_ptr mp, tp, rp;
    236     mp_exp_t exp_in_limbs;
    237     mp_size_t prec = PREC(x) + 1;
    238     int divflag;
    239     mp_size_t madj, radj;
    240 
    241 #if 0
    242     size_t n_chars_needed;
    243 
    244     /* This breaks things like 0.000...0001.  To safely ignore superfluous
    245        digits, we need to skip over leading zeros.  */
    246     /* Just consider the relevant leading digits of the mantissa.  */
    247     LIMBS_PER_DIGIT_IN_BASE (n_chars_needed, prec, base);
    248     if (str_size > n_chars_needed)
    249       str_size = n_chars_needed;
    250 #endif
    251 
    252     LIMBS_PER_DIGIT_IN_BASE (ma, str_size, base);
    253     mp = TMP_ALLOC_LIMBS (ma);
    254     mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);
    255 
    256     if (mn == 0)
    257       {
    258 	SIZ(x) = 0;
    259 	EXP(x) = 0;
    260 	TMP_FREE;
    261 	return 0;
    262       }
    263 
    264     madj = 0;
    265     /* Ignore excess limbs in MP,MSIZE.  */
    266     if (mn > prec)
    267       {
    268 	madj = mn - prec;
    269 	mp += mn - prec;
    270 	mn = prec;
    271       }
    272 
    273     if (expptr != 0)
    274       {
    275 	/* Scan and convert the exponent, in base exp_base.  */
    276 	long dig, minus, plusminus;
    277 	c = (unsigned char) *expptr;
    278 	minus = -(long) (c == '-');
    279 	plusminus = minus | -(long) (c == '+');
    280 	expptr -= plusminus;			/* conditional increment */
    281 	c = (unsigned char) *expptr++;
    282 	dig = digit_value[c];
    283 	if (dig >= exp_base)
    284 	  {
    285 	    TMP_FREE;
    286 	    return -1;
    287 	  }
    288 	exp_in_base = dig;
    289 	c = (unsigned char) *expptr++;
    290 	dig = digit_value[c];
    291 	while (dig < exp_base)
    292 	  {
    293 	    exp_in_base = exp_in_base * exp_base;
    294 	    exp_in_base += dig;
    295 	    c = (unsigned char) *expptr++;
    296 	    dig = digit_value[c];
    297 	  }
    298 	exp_in_base = (exp_in_base ^ minus) - minus; /* conditional negation */
    299       }
    300     else
    301       exp_in_base = 0;
    302     if (dotpos != 0)
    303       exp_in_base -= s - dotpos;
    304     divflag = exp_in_base < 0;
    305     exp_in_base = ABS (exp_in_base);
    306 
    307     if (exp_in_base == 0)
    308       {
    309 	MPN_COPY (PTR(x), mp, mn);
    310 	SIZ(x) = negative ? -mn : mn;
    311 	EXP(x) = mn + madj;
    312 	TMP_FREE;
    313 	return 0;
    314       }
    315 
    316     ra = 2 * (prec + 1);
    317     rp = TMP_ALLOC_LIMBS (ra);
    318     tp = TMP_ALLOC_LIMBS (ra);
    319     rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp);
    320 
    321     if (divflag)
    322       {
    323 #if 0
    324 	/* FIXME: Should use mpn_div_q here.  */
    325 	...
    326 	mpn_div_q (tp, mp, mn, rp, rn, scratch);
    327 	...
    328 #else
    329 	mp_ptr qp;
    330 	mp_limb_t qlimb;
    331 	if (mn < rn)
    332 	  {
    333 	    /* Pad out MP,MSIZE for current divrem semantics.  */
    334 	    mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1);
    335 	    MPN_ZERO (tmp, rn - mn);
    336 	    MPN_COPY (tmp + rn - mn, mp, mn);
    337 	    mp = tmp;
    338 	    madj -= rn - mn;
    339 	    mn = rn;
    340 	  }
    341 	if ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0)
    342 	  {
    343 	    mp_limb_t cy;
    344 	    count_leading_zeros (cnt, rp[rn - 1]);
    345 	    cnt -= GMP_NAIL_BITS;
    346 	    mpn_lshift (rp, rp, rn, cnt);
    347 	    cy = mpn_lshift (mp, mp, mn, cnt);
    348 	    if (cy)
    349 	      mp[mn++] = cy;
    350 	  }
    351 
    352 	qp = TMP_ALLOC_LIMBS (prec + 1);
    353 	qlimb = mpn_divrem (qp, prec - (mn - rn), mp, mn, rp, rn);
    354 	tp = qp;
    355 	exp_in_limbs = qlimb + (mn - rn) + (madj - radj);
    356 	rn = prec;
    357 	if (qlimb != 0)
    358 	  {
    359 	    tp[prec] = qlimb;
    360 	    /* Skip the least significant limb not to overrun the destination
    361 	       variable.  */
    362 	    tp++;
    363 	  }
    364 #endif
    365       }
    366     else
    367       {
    368 	tp = TMP_ALLOC_LIMBS (rn + mn);
    369 	if (rn > mn)
    370 	  mpn_mul (tp, rp, rn, mp, mn);
    371 	else
    372 	  mpn_mul (tp, mp, mn, rp, rn);
    373 	rn += mn;
    374 	rn -= tp[rn - 1] == 0;
    375 	exp_in_limbs = rn + madj + radj;
    376 
    377 	if (rn > prec)
    378 	  {
    379 	    tp += rn - prec;
    380 	    rn = prec;
    381 	    exp_in_limbs += 0;
    382 	  }
    383       }
    384 
    385     MPN_COPY (PTR(x), tp, rn);
    386     SIZ(x) = negative ? -rn : rn;
    387     EXP(x) = exp_in_limbs;
    388     TMP_FREE;
    389     return 0;
    390   }
    391 }
    392