1 1.1 mrg /* __gmp_doscan -- formatted input internals. 2 1.1 mrg 3 1.1 mrg THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST 4 1.1 mrg CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN 5 1.1 mrg FUTURE GNU MP RELEASES. 6 1.1 mrg 7 1.1.1.2 mrg Copyright 2001-2003 Free Software Foundation, Inc. 8 1.1 mrg 9 1.1 mrg This file is part of the GNU MP Library. 10 1.1 mrg 11 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 12 1.1.1.2 mrg it under the terms of either: 13 1.1.1.2 mrg 14 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free 15 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your 16 1.1.1.2 mrg option) any later version. 17 1.1.1.2 mrg 18 1.1.1.2 mrg or 19 1.1.1.2 mrg 20 1.1.1.2 mrg * the GNU General Public License as published by the Free Software 21 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any 22 1.1.1.2 mrg later version. 23 1.1.1.2 mrg 24 1.1.1.2 mrg or both in parallel, as here. 25 1.1 mrg 26 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 27 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29 1.1.1.2 mrg for more details. 30 1.1 mrg 31 1.1.1.2 mrg You should have received copies of the GNU General Public License and the 32 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 33 1.1.1.2 mrg see https://www.gnu.org/licenses/. */ 34 1.1 mrg 35 1.1 mrg #define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */ 36 1.1 mrg 37 1.1.1.2 mrg #include "config.h" /* needed for the HAVE_, could also move gmp incls */ 38 1.1 mrg 39 1.1 mrg #include <stdarg.h> 40 1.1 mrg #include <ctype.h> 41 1.1 mrg #include <stddef.h> /* for ptrdiff_t */ 42 1.1 mrg #include <stdio.h> 43 1.1 mrg #include <stdlib.h> /* for strtol */ 44 1.1 mrg #include <string.h> 45 1.1 mrg 46 1.1 mrg #if HAVE_LANGINFO_H 47 1.1 mrg #include <langinfo.h> /* for nl_langinfo */ 48 1.1 mrg #endif 49 1.1 mrg 50 1.1 mrg #if HAVE_LOCALE_H 51 1.1 mrg #include <locale.h> /* for localeconv */ 52 1.1 mrg #endif 53 1.1 mrg 54 1.1 mrg #if HAVE_INTTYPES_H 55 1.1 mrg # include <inttypes.h> /* for intmax_t */ 56 1.1 mrg #else 57 1.1 mrg # if HAVE_STDINT_H 58 1.1 mrg # include <stdint.h> 59 1.1 mrg # endif 60 1.1 mrg #endif 61 1.1 mrg 62 1.1 mrg #if HAVE_SYS_TYPES_H 63 1.1 mrg #include <sys/types.h> /* for quad_t */ 64 1.1 mrg #endif 65 1.1 mrg 66 1.1 mrg #include "gmp-impl.h" 67 1.1 mrg 68 1.1 mrg 69 1.1 mrg /* Change this to "#define TRACE(x) x" for some traces. */ 70 1.1 mrg #define TRACE(x) 71 1.1 mrg 72 1.1 mrg 73 1.1 mrg /* General: 74 1.1 mrg 75 1.1 mrg It's necessary to parse up the format string to recognise the GMP 76 1.1 mrg extra types F, Q and Z. Other types and conversions are passed 77 1.1 mrg across to the standard sscanf or fscanf via funs->scan, for ease of 78 1.1 mrg implementation. This is essential in the case of something like glibc 79 1.1 mrg %p where the pointer format isn't actually documented. 80 1.1 mrg 81 1.1 mrg Because funs->scan doesn't get the whole input it can't put the right 82 1.1 mrg values in for %n, so that's handled in __gmp_doscan. Neither sscanf 83 1.1 mrg nor fscanf directly indicate how many characters were read, so an 84 1.1 mrg extra %n is appended to each run for that. For fscanf this merely 85 1.1 mrg supports our %n output, but for sscanf it lets funs->step move us 86 1.1 mrg along the input string. 87 1.1 mrg 88 1.1 mrg Whitespace and literal matches in the format string, including %%, 89 1.1 mrg are handled directly within __gmp_doscan. This is reasonably 90 1.1 mrg efficient, and avoids some suspicious behaviour observed in various 91 1.1 mrg system libc's. GLIBC 2.2.4 for instance returns 0 on 92 1.1 mrg 93 1.1 mrg sscanf(" ", " x") 94 1.1 mrg or 95 1.1 mrg sscanf(" ", " x%d",&n) 96 1.1 mrg 97 1.1 mrg whereas we think they should return EOF, since end-of-string is 98 1.1 mrg reached when a match of "x" is required. 99 1.1 mrg 100 1.1 mrg For standard % conversions, funs->scan is called once for each 101 1.1 mrg conversion. If we had vfscanf and vsscanf and could rely on their 102 1.1 mrg fixed text matching behaviour then we could call them with multiple 103 1.1 mrg consecutive standard conversions. But plain fscanf and sscanf work 104 1.1 mrg fine, and parsing one field at a time shouldn't be too much of a 105 1.1 mrg slowdown. 106 1.1 mrg 107 1.1 mrg gmpscan: 108 1.1 mrg 109 1.1 mrg gmpscan reads a gmp type. It's only used from one place, but is a 110 1.1 mrg separate subroutine to avoid a big chunk of complicated code in the 111 1.1 mrg middle of __gmp_doscan. Within gmpscan a couple of loopbacks make it 112 1.1 mrg possible to share code for parsing integers, rationals and floats. 113 1.1 mrg 114 1.1 mrg In gmpscan normally one char of lookahead is maintained, but when width 115 1.1 mrg is reached that stops, on the principle that an fgetc/ungetc of a char 116 1.1 mrg past where we're told to stop would be undesirable. "chars" is how many 117 1.1 mrg characters have been read so far, including the current c. When 118 1.1 mrg chars==width and another character is desired then a jump is done to the 119 1.1 mrg "convert" stage. c is invalid and mustn't be unget'ed in this case; 120 1.1 mrg chars is set to width+1 to indicate that. 121 1.1 mrg 122 1.1 mrg gmpscan normally returns the number of characters read. -1 means an 123 1.1 mrg invalid field, -2 means EOF reached before any matching characters 124 1.1 mrg were read. 125 1.1 mrg 126 1.1 mrg For hex floats, the mantissa part is passed to mpf_set_str, then the 127 1.1 mrg exponent is applied with mpf_mul_exp or mpf_div_2exp. This is easier 128 1.1 mrg than teaching mpf_set_str about an exponent factor (ie. 2) differing 129 1.1 mrg from the mantissa radix point factor (ie. 16). mpf_mul_exp and 130 1.1 mrg mpf_div_2exp will preserve the application requested precision, so 131 1.1 mrg nothing in that respect is lost by making this a two-step process. 132 1.1 mrg 133 1.1 mrg Matching and errors: 134 1.1 mrg 135 1.1 mrg C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest 136 1.1 mrg string which is a match for the appropriate type, or a prefix of a 137 1.1 mrg match. With that done, if it's only a prefix then the result is a 138 1.1 mrg matching failure, ie. invalid input. 139 1.1 mrg 140 1.1 mrg This rule seems fairly clear, but doesn't seem to be universally 141 1.1 mrg applied in system C libraries. Even GLIBC doesn't seem to get it 142 1.1 mrg right, insofar as it seems to accept some apparently invalid forms. 143 1.1 mrg Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the 144 1.1 mrg standard would suggest a non-empty sequence of digits should be 145 1.1 mrg required after an "0x". 146 1.1 mrg 147 1.1 mrg A footnote to 7.19.6.2 para 17 notes how this input item reading can 148 1.1 mrg mean inputs acceptable to strtol are not acceptable to fscanf. We 149 1.1 mrg think this confirms our reading of "0x" as invalid. 150 1.1 mrg 151 1.1 mrg Clearly gmp_sscanf could backtrack to a longest input which was a 152 1.1 mrg valid match for a given item, but this is not done, since C99 says 153 1.1 mrg sscanf is identical to fscanf, so we make gmp_sscanf identical to 154 1.1 mrg gmp_fscanf. 155 1.1 mrg 156 1.1 mrg Types: 157 1.1 mrg 158 1.1 mrg C99 says "ll" is for long long, and "L" is for long double floats. 159 1.1 mrg Unfortunately in GMP 4.1.1 we documented the two as equivalent. This 160 1.1 mrg doesn't affect us directly, since both are passed through to plain 161 1.1 mrg scanf. It seems wisest not to try to enforce the C99 rule. This is 162 1.1 mrg consistent with what we said before, though whether it actually 163 1.1 mrg worked was always up to the C library. 164 1.1 mrg 165 1.1 mrg Alternatives: 166 1.1 mrg 167 1.1 mrg Consideration was given to using separate code for gmp_fscanf and 168 1.1 mrg gmp_sscanf. The sscanf case could zip across a string doing literal 169 1.1 mrg matches or recognising digits in gmpscan, rather than making a 170 1.1 mrg function call fun->get per character. The fscanf could use getc 171 1.1 mrg rather than fgetc too, which might help those systems where getc is a 172 1.1 mrg macro or otherwise inlined. But none of this scanning and converting 173 1.1 mrg will be particularly fast, so the two are done together to keep it a 174 1.1 mrg little simpler for now. 175 1.1 mrg 176 1.1 mrg Various multibyte string issues are not addressed, for a start C99 177 1.1 mrg scanf says the format string is multibyte. Since we pass %c, %s and 178 1.1 mrg %[ to the system scanf, they might do multibyte reads already, but 179 1.1 mrg it's another matter whether or not that can be used, since our digit 180 1.1 mrg and whitespace parsing is only unibyte. The plan is to quietly 181 1.1 mrg ignore multibyte locales for now. This is not as bad as it sounds, 182 1.1 mrg since GMP is presumably used mostly on numbers, which can be 183 1.1 mrg perfectly adequately treated in plain ASCII. 184 1.1 mrg 185 1.1 mrg */ 186 1.1 mrg 187 1.1 mrg 188 1.1 mrg struct gmp_doscan_params_t { 189 1.1 mrg int base; 190 1.1 mrg int ignore; 191 1.1 mrg char type; 192 1.1 mrg int width; 193 1.1 mrg }; 194 1.1 mrg 195 1.1 mrg 196 1.1 mrg #define GET(c) \ 197 1.1 mrg do { \ 198 1.1 mrg ASSERT (chars <= width); \ 199 1.1 mrg chars++; \ 200 1.1 mrg if (chars > width) \ 201 1.1 mrg goto convert; \ 202 1.1 mrg (c) = (*funs->get) (data); \ 203 1.1 mrg } while (0) 204 1.1 mrg 205 1.1 mrg /* store into "s", extending if necessary */ 206 1.1 mrg #define STORE(c) \ 207 1.1 mrg do { \ 208 1.1 mrg ASSERT (s_upto <= s_alloc); \ 209 1.1 mrg if (s_upto >= s_alloc) \ 210 1.1 mrg { \ 211 1.1 mrg size_t s_alloc_new = s_alloc + S_ALLOC_STEP; \ 212 1.1 mrg s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \ 213 1.1 mrg s_alloc = s_alloc_new; \ 214 1.1 mrg } \ 215 1.1 mrg s[s_upto++] = c; \ 216 1.1 mrg } while (0) 217 1.1 mrg 218 1.1 mrg #define S_ALLOC_STEP 512 219 1.1 mrg 220 1.1 mrg static int 221 1.1 mrg gmpscan (const struct gmp_doscan_funs_t *funs, void *data, 222 1.1 mrg const struct gmp_doscan_params_t *p, void *dst) 223 1.1 mrg { 224 1.1 mrg int chars, c, base, first, width, seen_point, seen_digit, hexfloat; 225 1.1 mrg size_t s_upto, s_alloc, hexexp; 226 1.1 mrg char *s; 227 1.1 mrg int invalid = 0; 228 1.1 mrg 229 1.1 mrg TRACE (printf ("gmpscan\n")); 230 1.1 mrg 231 1.1 mrg ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z'); 232 1.1 mrg 233 1.1 mrg c = (*funs->get) (data); 234 1.1 mrg if (c == EOF) 235 1.1 mrg return -2; 236 1.1 mrg 237 1.1 mrg chars = 1; 238 1.1 mrg first = 1; 239 1.1 mrg seen_point = 0; 240 1.1 mrg width = (p->width == 0 ? INT_MAX-1 : p->width); 241 1.1 mrg base = p->base; 242 1.1 mrg s_alloc = S_ALLOC_STEP; 243 1.1 mrg s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char); 244 1.1 mrg s_upto = 0; 245 1.1 mrg hexfloat = 0; 246 1.1 mrg hexexp = 0; 247 1.1 mrg 248 1.1 mrg another: 249 1.1 mrg seen_digit = 0; 250 1.1 mrg if (c == '-') 251 1.1 mrg { 252 1.1 mrg STORE (c); 253 1.1 mrg goto get_for_sign; 254 1.1 mrg } 255 1.1 mrg else if (c == '+') 256 1.1 mrg { 257 1.1 mrg /* don't store '+', it's not accepted by mpz_set_str etc */ 258 1.1 mrg get_for_sign: 259 1.1 mrg GET (c); 260 1.1 mrg } 261 1.1 mrg 262 1.1 mrg if (base == 0) 263 1.1 mrg { 264 1.1 mrg base = 10; /* decimal if no base indicator */ 265 1.1 mrg if (c == '0') 266 1.1 mrg { 267 1.1 mrg seen_digit = 1; /* 0 alone is a valid number */ 268 1.1 mrg if (p->type != 'F') 269 1.1 mrg base = 8; /* leading 0 is octal, for non-floats */ 270 1.1 mrg STORE (c); 271 1.1 mrg GET (c); 272 1.1 mrg if (c == 'x' || c == 'X') 273 1.1 mrg { 274 1.1 mrg base = 16; 275 1.1 mrg seen_digit = 0; /* must have digits after an 0x */ 276 1.1 mrg if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */ 277 1.1 mrg hexfloat = 1; 278 1.1 mrg else 279 1.1 mrg STORE (c); 280 1.1 mrg GET (c); 281 1.1 mrg } 282 1.1 mrg } 283 1.1 mrg } 284 1.1 mrg 285 1.1 mrg digits: 286 1.1 mrg for (;;) 287 1.1 mrg { 288 1.1 mrg if (base == 16) 289 1.1 mrg { 290 1.1 mrg if (! isxdigit (c)) 291 1.1 mrg break; 292 1.1 mrg } 293 1.1 mrg else 294 1.1 mrg { 295 1.1 mrg if (! isdigit (c)) 296 1.1 mrg break; 297 1.1 mrg if (base == 8 && (c == '8' || c == '9')) 298 1.1 mrg break; 299 1.1 mrg } 300 1.1 mrg 301 1.1 mrg seen_digit = 1; 302 1.1 mrg STORE (c); 303 1.1 mrg GET (c); 304 1.1 mrg } 305 1.1 mrg 306 1.1 mrg if (first) 307 1.1 mrg { 308 1.1 mrg /* decimal point */ 309 1.1 mrg if (p->type == 'F' && ! seen_point) 310 1.1 mrg { 311 1.1 mrg /* For a multi-character decimal point, if the first character is 312 1.1 mrg present then all of it must be, otherwise the input is 313 1.1 mrg considered invalid. */ 314 1.1 mrg const char *point = GMP_DECIMAL_POINT; 315 1.1 mrg int pc = (unsigned char) *point++; 316 1.1 mrg if (c == pc) 317 1.1 mrg { 318 1.1 mrg for (;;) 319 1.1 mrg { 320 1.1 mrg STORE (c); 321 1.1 mrg GET (c); 322 1.1 mrg pc = (unsigned char) *point++; 323 1.1 mrg if (pc == '\0') 324 1.1 mrg break; 325 1.1 mrg if (c != pc) 326 1.1 mrg goto set_invalid; 327 1.1 mrg } 328 1.1 mrg seen_point = 1; 329 1.1 mrg goto digits; 330 1.1 mrg } 331 1.1 mrg } 332 1.1 mrg 333 1.1 mrg /* exponent */ 334 1.1 mrg if (p->type == 'F') 335 1.1 mrg { 336 1.1 mrg if (hexfloat && (c == 'p' || c == 'P')) 337 1.1 mrg { 338 1.1 mrg hexexp = s_upto; /* exponent location */ 339 1.1 mrg base = 10; /* exponent in decimal */ 340 1.1 mrg goto exponent; 341 1.1 mrg } 342 1.1 mrg else if (! hexfloat && (c == 'e' || c == 'E')) 343 1.1 mrg { 344 1.1 mrg exponent: 345 1.1 mrg /* must have at least one digit in the mantissa, just an exponent 346 1.1 mrg is not good enough */ 347 1.1 mrg if (! seen_digit) 348 1.1 mrg goto set_invalid; 349 1.1 mrg 350 1.1 mrg do_second: 351 1.1 mrg first = 0; 352 1.1 mrg STORE (c); 353 1.1 mrg GET (c); 354 1.1 mrg goto another; 355 1.1 mrg } 356 1.1 mrg } 357 1.1 mrg 358 1.1 mrg /* denominator */ 359 1.1 mrg if (p->type == 'Q' && c == '/') 360 1.1 mrg { 361 1.1 mrg /* must have at least one digit in the numerator */ 362 1.1 mrg if (! seen_digit) 363 1.1 mrg goto set_invalid; 364 1.1 mrg 365 1.1 mrg /* now look for at least one digit in the denominator */ 366 1.1 mrg seen_digit = 0; 367 1.1 mrg 368 1.1 mrg /* allow the base to be redetermined for "%i" */ 369 1.1 mrg base = p->base; 370 1.1 mrg goto do_second; 371 1.1 mrg } 372 1.1 mrg } 373 1.1 mrg 374 1.1 mrg convert: 375 1.1 mrg if (! seen_digit) 376 1.1 mrg { 377 1.1 mrg set_invalid: 378 1.1 mrg invalid = 1; 379 1.1 mrg goto done; 380 1.1 mrg } 381 1.1 mrg 382 1.1 mrg if (! p->ignore) 383 1.1 mrg { 384 1.1 mrg STORE ('\0'); 385 1.1 mrg TRACE (printf (" convert \"%s\"\n", s)); 386 1.1 mrg 387 1.1 mrg /* We ought to have parsed out a valid string above, so just test 388 1.1 mrg mpz_set_str etc with an ASSERT. */ 389 1.1 mrg switch (p->type) { 390 1.1 mrg case 'F': 391 1.1 mrg { 392 1.1 mrg mpf_ptr f = (mpf_ptr) dst; 393 1.1 mrg if (hexexp != 0) 394 1.1 mrg s[hexexp] = '\0'; 395 1.1 mrg ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10)); 396 1.1 mrg if (hexexp != 0) 397 1.1 mrg { 398 1.1 mrg char *dummy; 399 1.1 mrg long exp; 400 1.1 mrg exp = strtol (s + hexexp + 1, &dummy, 10); 401 1.1 mrg if (exp >= 0) 402 1.1 mrg mpf_mul_2exp (f, f, (unsigned long) exp); 403 1.1 mrg else 404 1.1.1.3 mrg mpf_div_2exp (f, f, NEG_CAST (unsigned long, exp)); 405 1.1 mrg } 406 1.1 mrg } 407 1.1 mrg break; 408 1.1 mrg case 'Q': 409 1.1 mrg ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base)); 410 1.1 mrg break; 411 1.1 mrg case 'Z': 412 1.1 mrg ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base)); 413 1.1 mrg break; 414 1.1 mrg default: 415 1.1 mrg ASSERT (0); 416 1.1 mrg /*FALLTHRU*/ 417 1.1 mrg break; 418 1.1 mrg } 419 1.1 mrg } 420 1.1 mrg 421 1.1 mrg done: 422 1.1 mrg ASSERT (chars <= width+1); 423 1.1 mrg if (chars != width+1) 424 1.1 mrg { 425 1.1 mrg (*funs->unget) (c, data); 426 1.1 mrg TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1)); 427 1.1 mrg } 428 1.1 mrg chars--; 429 1.1 mrg 430 1.1 mrg (*__gmp_free_func) (s, s_alloc); 431 1.1 mrg 432 1.1 mrg if (invalid) 433 1.1 mrg { 434 1.1 mrg TRACE (printf (" invalid\n")); 435 1.1 mrg return -1; 436 1.1 mrg } 437 1.1 mrg 438 1.1 mrg TRACE (printf (" return %d chars (cf width %d)\n", chars, width)); 439 1.1 mrg return chars; 440 1.1 mrg } 441 1.1 mrg 442 1.1 mrg 443 1.1 mrg /* Read and discard whitespace, if any. Return number of chars skipped. 444 1.1 mrg Whitespace skipping never provokes the EOF return from __gmp_doscan, so 445 1.1 mrg it's not necessary to watch for EOF from funs->get, */ 446 1.1 mrg static int 447 1.1 mrg skip_white (const struct gmp_doscan_funs_t *funs, void *data) 448 1.1 mrg { 449 1.1 mrg int c; 450 1.1 mrg int ret = 0; 451 1.1 mrg 452 1.1 mrg do 453 1.1 mrg { 454 1.1 mrg c = (funs->get) (data); 455 1.1 mrg ret++; 456 1.1 mrg } 457 1.1 mrg while (isspace (c)); 458 1.1 mrg 459 1.1 mrg (funs->unget) (c, data); 460 1.1 mrg ret--; 461 1.1 mrg 462 1.1 mrg TRACE (printf (" skip white %d\n", ret)); 463 1.1 mrg return ret; 464 1.1 mrg } 465 1.1 mrg 466 1.1 mrg 467 1.1 mrg int 468 1.1 mrg __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data, 469 1.1 mrg const char *orig_fmt, va_list orig_ap) 470 1.1 mrg { 471 1.1 mrg struct gmp_doscan_params_t param; 472 1.1 mrg va_list ap; 473 1.1 mrg char *alloc_fmt; 474 1.1 mrg const char *fmt, *this_fmt, *end_fmt; 475 1.1 mrg size_t orig_fmt_len, alloc_fmt_size, len; 476 1.1 mrg int new_fields, new_chars; 477 1.1 mrg char fchar; 478 1.1 mrg int fields = 0; 479 1.1 mrg int chars = 0; 480 1.1 mrg 481 1.1 mrg TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt); 482 1.1 mrg if (funs->scan == (gmp_doscan_scan_t) sscanf) 483 1.1 mrg printf (" s=\"%s\"\n", * (const char **) data)); 484 1.1 mrg 485 1.1 mrg /* Don't modify orig_ap, if va_list is actually an array and hence call by 486 1.1 mrg reference. It could be argued that it'd be more efficient to leave 487 1.1 mrg callers to make a copy if they care, but doing so here is going to be a 488 1.1 mrg very small part of the total work, and we may as well keep applications 489 1.1 mrg out of trouble. */ 490 1.1 mrg va_copy (ap, orig_ap); 491 1.1 mrg 492 1.1 mrg /* Parts of the format string are going to be copied so that a " %n" can 493 1.1 mrg be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be 494 1.1 mrg needed if fmt consists of a single "%" specifier, but otherwise is an 495 1.1 mrg overestimate. We're not going to be very fast here, so use 496 1.1 mrg __gmp_allocate_func rather than TMP_ALLOC. */ 497 1.1 mrg orig_fmt_len = strlen (orig_fmt); 498 1.1 mrg alloc_fmt_size = orig_fmt_len + 4; 499 1.1 mrg alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char); 500 1.1 mrg 501 1.1 mrg fmt = orig_fmt; 502 1.1 mrg end_fmt = orig_fmt + orig_fmt_len; 503 1.1 mrg 504 1.1 mrg for (;;) 505 1.1 mrg { 506 1.1 mrg next: 507 1.1 mrg fchar = *fmt++; 508 1.1 mrg 509 1.1 mrg if (fchar == '\0') 510 1.1 mrg break; 511 1.1 mrg 512 1.1 mrg if (isspace (fchar)) 513 1.1 mrg { 514 1.1 mrg chars += skip_white (funs, data); 515 1.1 mrg continue; 516 1.1 mrg } 517 1.1 mrg 518 1.1 mrg if (fchar != '%') 519 1.1 mrg { 520 1.1 mrg int c; 521 1.1 mrg literal: 522 1.1 mrg c = (funs->get) (data); 523 1.1 mrg if (c != fchar) 524 1.1 mrg { 525 1.1 mrg (funs->unget) (c, data); 526 1.1 mrg if (c == EOF) 527 1.1 mrg { 528 1.1 mrg eof_no_match: 529 1.1 mrg if (fields == 0) 530 1.1 mrg fields = EOF; 531 1.1 mrg } 532 1.1 mrg goto done; 533 1.1 mrg } 534 1.1 mrg chars++; 535 1.1 mrg continue; 536 1.1 mrg } 537 1.1 mrg 538 1.1 mrg param.type = '\0'; 539 1.1 mrg param.base = 0; /* for e,f,g,i */ 540 1.1 mrg param.ignore = 0; 541 1.1 mrg param.width = 0; 542 1.1 mrg 543 1.1 mrg this_fmt = fmt-1; 544 1.1 mrg TRACE (printf (" this_fmt \"%s\"\n", this_fmt)); 545 1.1 mrg 546 1.1 mrg for (;;) 547 1.1 mrg { 548 1.1 mrg ASSERT (fmt <= end_fmt); 549 1.1 mrg 550 1.1 mrg fchar = *fmt++; 551 1.1 mrg switch (fchar) { 552 1.1 mrg 553 1.1 mrg case '\0': /* unterminated % sequence */ 554 1.1 mrg ASSERT (0); 555 1.1 mrg goto done; 556 1.1 mrg 557 1.1 mrg case '%': /* literal % */ 558 1.1 mrg goto literal; 559 1.1 mrg 560 1.1 mrg case '[': /* character range */ 561 1.1 mrg fchar = *fmt++; 562 1.1 mrg if (fchar == '^') 563 1.1 mrg fchar = *fmt++; 564 1.1 mrg /* ']' allowed as the first char (possibly after '^') */ 565 1.1 mrg if (fchar == ']') 566 1.1 mrg fchar = *fmt++; 567 1.1 mrg for (;;) 568 1.1 mrg { 569 1.1 mrg ASSERT (fmt <= end_fmt); 570 1.1 mrg if (fchar == '\0') 571 1.1 mrg { 572 1.1 mrg /* unterminated % sequence */ 573 1.1 mrg ASSERT (0); 574 1.1 mrg goto done; 575 1.1 mrg } 576 1.1 mrg if (fchar == ']') 577 1.1 mrg break; 578 1.1 mrg fchar = *fmt++; 579 1.1 mrg } 580 1.1 mrg /*FALLTHRU*/ 581 1.1 mrg case 'c': /* characters */ 582 1.1 mrg case 's': /* string of non-whitespace */ 583 1.1 mrg case 'p': /* pointer */ 584 1.1 mrg libc_type: 585 1.1 mrg len = fmt - this_fmt; 586 1.1 mrg memcpy (alloc_fmt, this_fmt, len); 587 1.1 mrg alloc_fmt[len++] = '%'; 588 1.1 mrg alloc_fmt[len++] = 'n'; 589 1.1 mrg alloc_fmt[len] = '\0'; 590 1.1 mrg 591 1.1 mrg TRACE (printf (" scan \"%s\"\n", alloc_fmt); 592 1.1 mrg if (funs->scan == (gmp_doscan_scan_t) sscanf) 593 1.1 mrg printf (" s=\"%s\"\n", * (const char **) data)); 594 1.1 mrg 595 1.1 mrg new_chars = -1; 596 1.1 mrg if (param.ignore) 597 1.1 mrg { 598 1.1 mrg new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL); 599 1.1 mrg ASSERT (new_fields == 0 || new_fields == EOF); 600 1.1 mrg } 601 1.1 mrg else 602 1.1 mrg { 603 1.1 mrg void *arg = va_arg (ap, void *); 604 1.1 mrg new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars); 605 1.1 mrg ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF); 606 1.1 mrg 607 1.1 mrg if (new_fields == 0) 608 1.1 mrg goto done; /* invalid input */ 609 1.1 mrg 610 1.1 mrg if (new_fields == 1) 611 1.1 mrg ASSERT (new_chars != -1); 612 1.1 mrg } 613 1.1 mrg TRACE (printf (" new_fields %d new_chars %d\n", 614 1.1 mrg new_fields, new_chars)); 615 1.1 mrg 616 1.1 mrg if (new_fields == -1) 617 1.1 mrg goto eof_no_match; /* EOF before anything matched */ 618 1.1 mrg 619 1.1 mrg /* Under param.ignore, when new_fields==0 we don't know if 620 1.1 mrg it's a successful match or an invalid field. new_chars 621 1.1 mrg won't have been assigned if it was an invalid field. */ 622 1.1 mrg if (new_chars == -1) 623 1.1 mrg goto done; /* invalid input */ 624 1.1 mrg 625 1.1 mrg chars += new_chars; 626 1.1 mrg (*funs->step) (data, new_chars); 627 1.1 mrg 628 1.1 mrg increment_fields: 629 1.1 mrg if (! param.ignore) 630 1.1 mrg fields++; 631 1.1 mrg goto next; 632 1.1 mrg 633 1.1 mrg case 'd': /* decimal */ 634 1.1 mrg case 'u': /* decimal */ 635 1.1 mrg param.base = 10; 636 1.1 mrg goto numeric; 637 1.1 mrg 638 1.1 mrg case 'e': /* float */ 639 1.1 mrg case 'E': /* float */ 640 1.1 mrg case 'f': /* float */ 641 1.1 mrg case 'g': /* float */ 642 1.1 mrg case 'G': /* float */ 643 1.1 mrg case 'i': /* integer with base marker */ 644 1.1 mrg numeric: 645 1.1 mrg if (param.type != 'F' && param.type != 'Q' && param.type != 'Z') 646 1.1 mrg goto libc_type; 647 1.1 mrg 648 1.1 mrg chars += skip_white (funs, data); 649 1.1 mrg 650 1.1 mrg new_chars = gmpscan (funs, data, ¶m, 651 1.1 mrg param.ignore ? NULL : va_arg (ap, void*)); 652 1.1 mrg if (new_chars == -2) 653 1.1 mrg goto eof_no_match; 654 1.1 mrg if (new_chars == -1) 655 1.1 mrg goto done; 656 1.1 mrg 657 1.1 mrg ASSERT (new_chars >= 0); 658 1.1 mrg chars += new_chars; 659 1.1 mrg goto increment_fields; 660 1.1 mrg 661 1.1 mrg case 'a': /* glibc allocate string */ 662 1.1 mrg case '\'': /* glibc digit groupings */ 663 1.1 mrg break; 664 1.1 mrg 665 1.1 mrg case 'F': /* mpf_t */ 666 1.1 mrg case 'j': /* intmax_t */ 667 1.1 mrg case 'L': /* long long */ 668 1.1 mrg case 'q': /* quad_t */ 669 1.1 mrg case 'Q': /* mpq_t */ 670 1.1 mrg case 't': /* ptrdiff_t */ 671 1.1 mrg case 'z': /* size_t */ 672 1.1 mrg case 'Z': /* mpz_t */ 673 1.1 mrg set_type: 674 1.1 mrg param.type = fchar; 675 1.1 mrg break; 676 1.1 mrg 677 1.1 mrg case 'h': /* short or char */ 678 1.1 mrg if (param.type != 'h') 679 1.1 mrg goto set_type; 680 1.1 mrg param.type = 'H'; /* internal code for "hh" */ 681 1.1 mrg break; 682 1.1 mrg 683 1.1 mrg goto numeric; 684 1.1 mrg 685 1.1 mrg case 'l': /* long, long long, double or long double */ 686 1.1 mrg if (param.type != 'l') 687 1.1 mrg goto set_type; 688 1.1 mrg param.type = 'L'; /* "ll" means "L" */ 689 1.1 mrg break; 690 1.1 mrg 691 1.1 mrg case 'n': 692 1.1 mrg if (! param.ignore) 693 1.1 mrg { 694 1.1 mrg void *p; 695 1.1 mrg p = va_arg (ap, void *); 696 1.1 mrg TRACE (printf (" store %%n to %p\n", p)); 697 1.1 mrg switch (param.type) { 698 1.1 mrg case '\0': * (int *) p = chars; break; 699 1.1 mrg case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break; 700 1.1 mrg case 'H': * (char *) p = chars; break; 701 1.1 mrg case 'h': * (short *) p = chars; break; 702 1.1 mrg #if HAVE_INTMAX_T 703 1.1 mrg case 'j': * (intmax_t *) p = chars; break; 704 1.1 mrg #else 705 1.1 mrg case 'j': ASSERT_FAIL (intmax_t not available); break; 706 1.1 mrg #endif 707 1.1 mrg case 'l': * (long *) p = chars; break; 708 1.1 mrg #if HAVE_QUAD_T && HAVE_LONG_LONG 709 1.1 mrg case 'q': 710 1.1 mrg ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long)); 711 1.1 mrg /*FALLTHRU*/ 712 1.1 mrg #else 713 1.1 mrg case 'q': ASSERT_FAIL (quad_t not available); break; 714 1.1 mrg #endif 715 1.1 mrg #if HAVE_LONG_LONG 716 1.1 mrg case 'L': * (long long *) p = chars; break; 717 1.1 mrg #else 718 1.1 mrg case 'L': ASSERT_FAIL (long long not available); break; 719 1.1 mrg #endif 720 1.1 mrg case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break; 721 1.1 mrg #if HAVE_PTRDIFF_T 722 1.1 mrg case 't': * (ptrdiff_t *) p = chars; break; 723 1.1 mrg #else 724 1.1 mrg case 't': ASSERT_FAIL (ptrdiff_t not available); break; 725 1.1 mrg #endif 726 1.1 mrg case 'z': * (size_t *) p = chars; break; 727 1.1 mrg case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break; 728 1.1 mrg default: ASSERT (0); break; 729 1.1 mrg } 730 1.1 mrg } 731 1.1 mrg goto next; 732 1.1 mrg 733 1.1 mrg case 'o': 734 1.1 mrg param.base = 8; 735 1.1 mrg goto numeric; 736 1.1 mrg 737 1.1 mrg case 'x': 738 1.1 mrg case 'X': 739 1.1 mrg param.base = 16; 740 1.1 mrg goto numeric; 741 1.1 mrg 742 1.1 mrg case '0': case '1': case '2': case '3': case '4': 743 1.1 mrg case '5': case '6': case '7': case '8': case '9': 744 1.1 mrg param.width = 0; 745 1.1 mrg do { 746 1.1 mrg param.width = param.width * 10 + (fchar-'0'); 747 1.1 mrg fchar = *fmt++; 748 1.1 mrg } while (isdigit (fchar)); 749 1.1 mrg fmt--; /* unget the non-digit */ 750 1.1 mrg break; 751 1.1 mrg 752 1.1 mrg case '*': 753 1.1 mrg param.ignore = 1; 754 1.1 mrg break; 755 1.1 mrg 756 1.1 mrg default: 757 1.1 mrg /* something invalid in a % sequence */ 758 1.1 mrg ASSERT (0); 759 1.1 mrg goto next; 760 1.1 mrg } 761 1.1 mrg } 762 1.1 mrg } 763 1.1 mrg 764 1.1 mrg done: 765 1.1 mrg (*__gmp_free_func) (alloc_fmt, alloc_fmt_size); 766 1.1 mrg return fields; 767 1.1 mrg } 768