doscan.c revision 1.1.1.3 1 1.1 mrg /* __gmp_doscan -- formatted input internals.
2 1.1 mrg
3 1.1 mrg THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
4 1.1 mrg CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5 1.1 mrg FUTURE GNU MP RELEASES.
6 1.1 mrg
7 1.1.1.2 mrg Copyright 2001-2003 Free Software Foundation, Inc.
8 1.1 mrg
9 1.1 mrg This file is part of the GNU MP Library.
10 1.1 mrg
11 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
12 1.1.1.2 mrg it under the terms of either:
13 1.1.1.2 mrg
14 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free
15 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your
16 1.1.1.2 mrg option) any later version.
17 1.1.1.2 mrg
18 1.1.1.2 mrg or
19 1.1.1.2 mrg
20 1.1.1.2 mrg * the GNU General Public License as published by the Free Software
21 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any
22 1.1.1.2 mrg later version.
23 1.1.1.2 mrg
24 1.1.1.2 mrg or both in parallel, as here.
25 1.1 mrg
26 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
27 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 1.1.1.2 mrg for more details.
30 1.1 mrg
31 1.1.1.2 mrg You should have received copies of the GNU General Public License and the
32 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
33 1.1.1.2 mrg see https://www.gnu.org/licenses/. */
34 1.1 mrg
35 1.1 mrg #define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */
36 1.1 mrg
37 1.1.1.2 mrg #include "config.h" /* needed for the HAVE_, could also move gmp incls */
38 1.1 mrg
39 1.1 mrg #include <stdarg.h>
40 1.1 mrg #include <ctype.h>
41 1.1 mrg #include <stddef.h> /* for ptrdiff_t */
42 1.1 mrg #include <stdio.h>
43 1.1 mrg #include <stdlib.h> /* for strtol */
44 1.1 mrg #include <string.h>
45 1.1 mrg
46 1.1 mrg #if HAVE_LANGINFO_H
47 1.1 mrg #include <langinfo.h> /* for nl_langinfo */
48 1.1 mrg #endif
49 1.1 mrg
50 1.1 mrg #if HAVE_LOCALE_H
51 1.1 mrg #include <locale.h> /* for localeconv */
52 1.1 mrg #endif
53 1.1 mrg
54 1.1 mrg #if HAVE_INTTYPES_H
55 1.1 mrg # include <inttypes.h> /* for intmax_t */
56 1.1 mrg #else
57 1.1 mrg # if HAVE_STDINT_H
58 1.1 mrg # include <stdint.h>
59 1.1 mrg # endif
60 1.1 mrg #endif
61 1.1 mrg
62 1.1 mrg #if HAVE_SYS_TYPES_H
63 1.1 mrg #include <sys/types.h> /* for quad_t */
64 1.1 mrg #endif
65 1.1 mrg
66 1.1 mrg #include "gmp-impl.h"
67 1.1 mrg
68 1.1 mrg
69 1.1 mrg /* Change this to "#define TRACE(x) x" for some traces. */
70 1.1 mrg #define TRACE(x)
71 1.1 mrg
72 1.1 mrg
73 1.1 mrg /* General:
74 1.1 mrg
75 1.1 mrg It's necessary to parse up the format string to recognise the GMP
76 1.1 mrg extra types F, Q and Z. Other types and conversions are passed
77 1.1 mrg across to the standard sscanf or fscanf via funs->scan, for ease of
78 1.1 mrg implementation. This is essential in the case of something like glibc
79 1.1 mrg %p where the pointer format isn't actually documented.
80 1.1 mrg
81 1.1 mrg Because funs->scan doesn't get the whole input it can't put the right
82 1.1 mrg values in for %n, so that's handled in __gmp_doscan. Neither sscanf
83 1.1 mrg nor fscanf directly indicate how many characters were read, so an
84 1.1 mrg extra %n is appended to each run for that. For fscanf this merely
85 1.1 mrg supports our %n output, but for sscanf it lets funs->step move us
86 1.1 mrg along the input string.
87 1.1 mrg
88 1.1 mrg Whitespace and literal matches in the format string, including %%,
89 1.1 mrg are handled directly within __gmp_doscan. This is reasonably
90 1.1 mrg efficient, and avoids some suspicious behaviour observed in various
91 1.1 mrg system libc's. GLIBC 2.2.4 for instance returns 0 on
92 1.1 mrg
93 1.1 mrg sscanf(" ", " x")
94 1.1 mrg or
95 1.1 mrg sscanf(" ", " x%d",&n)
96 1.1 mrg
97 1.1 mrg whereas we think they should return EOF, since end-of-string is
98 1.1 mrg reached when a match of "x" is required.
99 1.1 mrg
100 1.1 mrg For standard % conversions, funs->scan is called once for each
101 1.1 mrg conversion. If we had vfscanf and vsscanf and could rely on their
102 1.1 mrg fixed text matching behaviour then we could call them with multiple
103 1.1 mrg consecutive standard conversions. But plain fscanf and sscanf work
104 1.1 mrg fine, and parsing one field at a time shouldn't be too much of a
105 1.1 mrg slowdown.
106 1.1 mrg
107 1.1 mrg gmpscan:
108 1.1 mrg
109 1.1 mrg gmpscan reads a gmp type. It's only used from one place, but is a
110 1.1 mrg separate subroutine to avoid a big chunk of complicated code in the
111 1.1 mrg middle of __gmp_doscan. Within gmpscan a couple of loopbacks make it
112 1.1 mrg possible to share code for parsing integers, rationals and floats.
113 1.1 mrg
114 1.1 mrg In gmpscan normally one char of lookahead is maintained, but when width
115 1.1 mrg is reached that stops, on the principle that an fgetc/ungetc of a char
116 1.1 mrg past where we're told to stop would be undesirable. "chars" is how many
117 1.1 mrg characters have been read so far, including the current c. When
118 1.1 mrg chars==width and another character is desired then a jump is done to the
119 1.1 mrg "convert" stage. c is invalid and mustn't be unget'ed in this case;
120 1.1 mrg chars is set to width+1 to indicate that.
121 1.1 mrg
122 1.1 mrg gmpscan normally returns the number of characters read. -1 means an
123 1.1 mrg invalid field, -2 means EOF reached before any matching characters
124 1.1 mrg were read.
125 1.1 mrg
126 1.1 mrg For hex floats, the mantissa part is passed to mpf_set_str, then the
127 1.1 mrg exponent is applied with mpf_mul_exp or mpf_div_2exp. This is easier
128 1.1 mrg than teaching mpf_set_str about an exponent factor (ie. 2) differing
129 1.1 mrg from the mantissa radix point factor (ie. 16). mpf_mul_exp and
130 1.1 mrg mpf_div_2exp will preserve the application requested precision, so
131 1.1 mrg nothing in that respect is lost by making this a two-step process.
132 1.1 mrg
133 1.1 mrg Matching and errors:
134 1.1 mrg
135 1.1 mrg C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest
136 1.1 mrg string which is a match for the appropriate type, or a prefix of a
137 1.1 mrg match. With that done, if it's only a prefix then the result is a
138 1.1 mrg matching failure, ie. invalid input.
139 1.1 mrg
140 1.1 mrg This rule seems fairly clear, but doesn't seem to be universally
141 1.1 mrg applied in system C libraries. Even GLIBC doesn't seem to get it
142 1.1 mrg right, insofar as it seems to accept some apparently invalid forms.
143 1.1 mrg Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the
144 1.1 mrg standard would suggest a non-empty sequence of digits should be
145 1.1 mrg required after an "0x".
146 1.1 mrg
147 1.1 mrg A footnote to 7.19.6.2 para 17 notes how this input item reading can
148 1.1 mrg mean inputs acceptable to strtol are not acceptable to fscanf. We
149 1.1 mrg think this confirms our reading of "0x" as invalid.
150 1.1 mrg
151 1.1 mrg Clearly gmp_sscanf could backtrack to a longest input which was a
152 1.1 mrg valid match for a given item, but this is not done, since C99 says
153 1.1 mrg sscanf is identical to fscanf, so we make gmp_sscanf identical to
154 1.1 mrg gmp_fscanf.
155 1.1 mrg
156 1.1 mrg Types:
157 1.1 mrg
158 1.1 mrg C99 says "ll" is for long long, and "L" is for long double floats.
159 1.1 mrg Unfortunately in GMP 4.1.1 we documented the two as equivalent. This
160 1.1 mrg doesn't affect us directly, since both are passed through to plain
161 1.1 mrg scanf. It seems wisest not to try to enforce the C99 rule. This is
162 1.1 mrg consistent with what we said before, though whether it actually
163 1.1 mrg worked was always up to the C library.
164 1.1 mrg
165 1.1 mrg Alternatives:
166 1.1 mrg
167 1.1 mrg Consideration was given to using separate code for gmp_fscanf and
168 1.1 mrg gmp_sscanf. The sscanf case could zip across a string doing literal
169 1.1 mrg matches or recognising digits in gmpscan, rather than making a
170 1.1 mrg function call fun->get per character. The fscanf could use getc
171 1.1 mrg rather than fgetc too, which might help those systems where getc is a
172 1.1 mrg macro or otherwise inlined. But none of this scanning and converting
173 1.1 mrg will be particularly fast, so the two are done together to keep it a
174 1.1 mrg little simpler for now.
175 1.1 mrg
176 1.1 mrg Various multibyte string issues are not addressed, for a start C99
177 1.1 mrg scanf says the format string is multibyte. Since we pass %c, %s and
178 1.1 mrg %[ to the system scanf, they might do multibyte reads already, but
179 1.1 mrg it's another matter whether or not that can be used, since our digit
180 1.1 mrg and whitespace parsing is only unibyte. The plan is to quietly
181 1.1 mrg ignore multibyte locales for now. This is not as bad as it sounds,
182 1.1 mrg since GMP is presumably used mostly on numbers, which can be
183 1.1 mrg perfectly adequately treated in plain ASCII.
184 1.1 mrg
185 1.1 mrg */
186 1.1 mrg
187 1.1 mrg
188 1.1 mrg struct gmp_doscan_params_t {
189 1.1 mrg int base;
190 1.1 mrg int ignore;
191 1.1 mrg char type;
192 1.1 mrg int width;
193 1.1 mrg };
194 1.1 mrg
195 1.1 mrg
196 1.1 mrg #define GET(c) \
197 1.1 mrg do { \
198 1.1 mrg ASSERT (chars <= width); \
199 1.1 mrg chars++; \
200 1.1 mrg if (chars > width) \
201 1.1 mrg goto convert; \
202 1.1 mrg (c) = (*funs->get) (data); \
203 1.1 mrg } while (0)
204 1.1 mrg
205 1.1 mrg /* store into "s", extending if necessary */
206 1.1 mrg #define STORE(c) \
207 1.1 mrg do { \
208 1.1 mrg ASSERT (s_upto <= s_alloc); \
209 1.1 mrg if (s_upto >= s_alloc) \
210 1.1 mrg { \
211 1.1 mrg size_t s_alloc_new = s_alloc + S_ALLOC_STEP; \
212 1.1 mrg s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \
213 1.1 mrg s_alloc = s_alloc_new; \
214 1.1 mrg } \
215 1.1 mrg s[s_upto++] = c; \
216 1.1 mrg } while (0)
217 1.1 mrg
218 1.1 mrg #define S_ALLOC_STEP 512
219 1.1 mrg
220 1.1 mrg static int
221 1.1 mrg gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
222 1.1 mrg const struct gmp_doscan_params_t *p, void *dst)
223 1.1 mrg {
224 1.1 mrg int chars, c, base, first, width, seen_point, seen_digit, hexfloat;
225 1.1 mrg size_t s_upto, s_alloc, hexexp;
226 1.1 mrg char *s;
227 1.1 mrg int invalid = 0;
228 1.1 mrg
229 1.1 mrg TRACE (printf ("gmpscan\n"));
230 1.1 mrg
231 1.1 mrg ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
232 1.1 mrg
233 1.1 mrg c = (*funs->get) (data);
234 1.1 mrg if (c == EOF)
235 1.1 mrg return -2;
236 1.1 mrg
237 1.1 mrg chars = 1;
238 1.1 mrg first = 1;
239 1.1 mrg seen_point = 0;
240 1.1 mrg width = (p->width == 0 ? INT_MAX-1 : p->width);
241 1.1 mrg base = p->base;
242 1.1 mrg s_alloc = S_ALLOC_STEP;
243 1.1 mrg s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);
244 1.1 mrg s_upto = 0;
245 1.1 mrg hexfloat = 0;
246 1.1 mrg hexexp = 0;
247 1.1 mrg
248 1.1 mrg another:
249 1.1 mrg seen_digit = 0;
250 1.1 mrg if (c == '-')
251 1.1 mrg {
252 1.1 mrg STORE (c);
253 1.1 mrg goto get_for_sign;
254 1.1 mrg }
255 1.1 mrg else if (c == '+')
256 1.1 mrg {
257 1.1 mrg /* don't store '+', it's not accepted by mpz_set_str etc */
258 1.1 mrg get_for_sign:
259 1.1 mrg GET (c);
260 1.1 mrg }
261 1.1 mrg
262 1.1 mrg if (base == 0)
263 1.1 mrg {
264 1.1 mrg base = 10; /* decimal if no base indicator */
265 1.1 mrg if (c == '0')
266 1.1 mrg {
267 1.1 mrg seen_digit = 1; /* 0 alone is a valid number */
268 1.1 mrg if (p->type != 'F')
269 1.1 mrg base = 8; /* leading 0 is octal, for non-floats */
270 1.1 mrg STORE (c);
271 1.1 mrg GET (c);
272 1.1 mrg if (c == 'x' || c == 'X')
273 1.1 mrg {
274 1.1 mrg base = 16;
275 1.1 mrg seen_digit = 0; /* must have digits after an 0x */
276 1.1 mrg if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */
277 1.1 mrg hexfloat = 1;
278 1.1 mrg else
279 1.1 mrg STORE (c);
280 1.1 mrg GET (c);
281 1.1 mrg }
282 1.1 mrg }
283 1.1 mrg }
284 1.1 mrg
285 1.1 mrg digits:
286 1.1 mrg for (;;)
287 1.1 mrg {
288 1.1 mrg if (base == 16)
289 1.1 mrg {
290 1.1 mrg if (! isxdigit (c))
291 1.1 mrg break;
292 1.1 mrg }
293 1.1 mrg else
294 1.1 mrg {
295 1.1 mrg if (! isdigit (c))
296 1.1 mrg break;
297 1.1 mrg if (base == 8 && (c == '8' || c == '9'))
298 1.1 mrg break;
299 1.1 mrg }
300 1.1 mrg
301 1.1 mrg seen_digit = 1;
302 1.1 mrg STORE (c);
303 1.1 mrg GET (c);
304 1.1 mrg }
305 1.1 mrg
306 1.1 mrg if (first)
307 1.1 mrg {
308 1.1 mrg /* decimal point */
309 1.1 mrg if (p->type == 'F' && ! seen_point)
310 1.1 mrg {
311 1.1 mrg /* For a multi-character decimal point, if the first character is
312 1.1 mrg present then all of it must be, otherwise the input is
313 1.1 mrg considered invalid. */
314 1.1 mrg const char *point = GMP_DECIMAL_POINT;
315 1.1 mrg int pc = (unsigned char) *point++;
316 1.1 mrg if (c == pc)
317 1.1 mrg {
318 1.1 mrg for (;;)
319 1.1 mrg {
320 1.1 mrg STORE (c);
321 1.1 mrg GET (c);
322 1.1 mrg pc = (unsigned char) *point++;
323 1.1 mrg if (pc == '\0')
324 1.1 mrg break;
325 1.1 mrg if (c != pc)
326 1.1 mrg goto set_invalid;
327 1.1 mrg }
328 1.1 mrg seen_point = 1;
329 1.1 mrg goto digits;
330 1.1 mrg }
331 1.1 mrg }
332 1.1 mrg
333 1.1 mrg /* exponent */
334 1.1 mrg if (p->type == 'F')
335 1.1 mrg {
336 1.1 mrg if (hexfloat && (c == 'p' || c == 'P'))
337 1.1 mrg {
338 1.1 mrg hexexp = s_upto; /* exponent location */
339 1.1 mrg base = 10; /* exponent in decimal */
340 1.1 mrg goto exponent;
341 1.1 mrg }
342 1.1 mrg else if (! hexfloat && (c == 'e' || c == 'E'))
343 1.1 mrg {
344 1.1 mrg exponent:
345 1.1 mrg /* must have at least one digit in the mantissa, just an exponent
346 1.1 mrg is not good enough */
347 1.1 mrg if (! seen_digit)
348 1.1 mrg goto set_invalid;
349 1.1 mrg
350 1.1 mrg do_second:
351 1.1 mrg first = 0;
352 1.1 mrg STORE (c);
353 1.1 mrg GET (c);
354 1.1 mrg goto another;
355 1.1 mrg }
356 1.1 mrg }
357 1.1 mrg
358 1.1 mrg /* denominator */
359 1.1 mrg if (p->type == 'Q' && c == '/')
360 1.1 mrg {
361 1.1 mrg /* must have at least one digit in the numerator */
362 1.1 mrg if (! seen_digit)
363 1.1 mrg goto set_invalid;
364 1.1 mrg
365 1.1 mrg /* now look for at least one digit in the denominator */
366 1.1 mrg seen_digit = 0;
367 1.1 mrg
368 1.1 mrg /* allow the base to be redetermined for "%i" */
369 1.1 mrg base = p->base;
370 1.1 mrg goto do_second;
371 1.1 mrg }
372 1.1 mrg }
373 1.1 mrg
374 1.1 mrg convert:
375 1.1 mrg if (! seen_digit)
376 1.1 mrg {
377 1.1 mrg set_invalid:
378 1.1 mrg invalid = 1;
379 1.1 mrg goto done;
380 1.1 mrg }
381 1.1 mrg
382 1.1 mrg if (! p->ignore)
383 1.1 mrg {
384 1.1 mrg STORE ('\0');
385 1.1 mrg TRACE (printf (" convert \"%s\"\n", s));
386 1.1 mrg
387 1.1 mrg /* We ought to have parsed out a valid string above, so just test
388 1.1 mrg mpz_set_str etc with an ASSERT. */
389 1.1 mrg switch (p->type) {
390 1.1 mrg case 'F':
391 1.1 mrg {
392 1.1 mrg mpf_ptr f = (mpf_ptr) dst;
393 1.1 mrg if (hexexp != 0)
394 1.1 mrg s[hexexp] = '\0';
395 1.1 mrg ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10));
396 1.1 mrg if (hexexp != 0)
397 1.1 mrg {
398 1.1 mrg char *dummy;
399 1.1 mrg long exp;
400 1.1 mrg exp = strtol (s + hexexp + 1, &dummy, 10);
401 1.1 mrg if (exp >= 0)
402 1.1 mrg mpf_mul_2exp (f, f, (unsigned long) exp);
403 1.1 mrg else
404 1.1.1.3 mrg mpf_div_2exp (f, f, NEG_CAST (unsigned long, exp));
405 1.1 mrg }
406 1.1 mrg }
407 1.1 mrg break;
408 1.1 mrg case 'Q':
409 1.1 mrg ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));
410 1.1 mrg break;
411 1.1 mrg case 'Z':
412 1.1 mrg ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));
413 1.1 mrg break;
414 1.1 mrg default:
415 1.1 mrg ASSERT (0);
416 1.1 mrg /*FALLTHRU*/
417 1.1 mrg break;
418 1.1 mrg }
419 1.1 mrg }
420 1.1 mrg
421 1.1 mrg done:
422 1.1 mrg ASSERT (chars <= width+1);
423 1.1 mrg if (chars != width+1)
424 1.1 mrg {
425 1.1 mrg (*funs->unget) (c, data);
426 1.1 mrg TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1));
427 1.1 mrg }
428 1.1 mrg chars--;
429 1.1 mrg
430 1.1 mrg (*__gmp_free_func) (s, s_alloc);
431 1.1 mrg
432 1.1 mrg if (invalid)
433 1.1 mrg {
434 1.1 mrg TRACE (printf (" invalid\n"));
435 1.1 mrg return -1;
436 1.1 mrg }
437 1.1 mrg
438 1.1 mrg TRACE (printf (" return %d chars (cf width %d)\n", chars, width));
439 1.1 mrg return chars;
440 1.1 mrg }
441 1.1 mrg
442 1.1 mrg
443 1.1 mrg /* Read and discard whitespace, if any. Return number of chars skipped.
444 1.1 mrg Whitespace skipping never provokes the EOF return from __gmp_doscan, so
445 1.1 mrg it's not necessary to watch for EOF from funs->get, */
446 1.1 mrg static int
447 1.1 mrg skip_white (const struct gmp_doscan_funs_t *funs, void *data)
448 1.1 mrg {
449 1.1 mrg int c;
450 1.1 mrg int ret = 0;
451 1.1 mrg
452 1.1 mrg do
453 1.1 mrg {
454 1.1 mrg c = (funs->get) (data);
455 1.1 mrg ret++;
456 1.1 mrg }
457 1.1 mrg while (isspace (c));
458 1.1 mrg
459 1.1 mrg (funs->unget) (c, data);
460 1.1 mrg ret--;
461 1.1 mrg
462 1.1 mrg TRACE (printf (" skip white %d\n", ret));
463 1.1 mrg return ret;
464 1.1 mrg }
465 1.1 mrg
466 1.1 mrg
467 1.1 mrg int
468 1.1 mrg __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
469 1.1 mrg const char *orig_fmt, va_list orig_ap)
470 1.1 mrg {
471 1.1 mrg struct gmp_doscan_params_t param;
472 1.1 mrg va_list ap;
473 1.1 mrg char *alloc_fmt;
474 1.1 mrg const char *fmt, *this_fmt, *end_fmt;
475 1.1 mrg size_t orig_fmt_len, alloc_fmt_size, len;
476 1.1 mrg int new_fields, new_chars;
477 1.1 mrg char fchar;
478 1.1 mrg int fields = 0;
479 1.1 mrg int chars = 0;
480 1.1 mrg
481 1.1 mrg TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt);
482 1.1 mrg if (funs->scan == (gmp_doscan_scan_t) sscanf)
483 1.1 mrg printf (" s=\"%s\"\n", * (const char **) data));
484 1.1 mrg
485 1.1 mrg /* Don't modify orig_ap, if va_list is actually an array and hence call by
486 1.1 mrg reference. It could be argued that it'd be more efficient to leave
487 1.1 mrg callers to make a copy if they care, but doing so here is going to be a
488 1.1 mrg very small part of the total work, and we may as well keep applications
489 1.1 mrg out of trouble. */
490 1.1 mrg va_copy (ap, orig_ap);
491 1.1 mrg
492 1.1 mrg /* Parts of the format string are going to be copied so that a " %n" can
493 1.1 mrg be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be
494 1.1 mrg needed if fmt consists of a single "%" specifier, but otherwise is an
495 1.1 mrg overestimate. We're not going to be very fast here, so use
496 1.1 mrg __gmp_allocate_func rather than TMP_ALLOC. */
497 1.1 mrg orig_fmt_len = strlen (orig_fmt);
498 1.1 mrg alloc_fmt_size = orig_fmt_len + 4;
499 1.1 mrg alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
500 1.1 mrg
501 1.1 mrg fmt = orig_fmt;
502 1.1 mrg end_fmt = orig_fmt + orig_fmt_len;
503 1.1 mrg
504 1.1 mrg for (;;)
505 1.1 mrg {
506 1.1 mrg next:
507 1.1 mrg fchar = *fmt++;
508 1.1 mrg
509 1.1 mrg if (fchar == '\0')
510 1.1 mrg break;
511 1.1 mrg
512 1.1 mrg if (isspace (fchar))
513 1.1 mrg {
514 1.1 mrg chars += skip_white (funs, data);
515 1.1 mrg continue;
516 1.1 mrg }
517 1.1 mrg
518 1.1 mrg if (fchar != '%')
519 1.1 mrg {
520 1.1 mrg int c;
521 1.1 mrg literal:
522 1.1 mrg c = (funs->get) (data);
523 1.1 mrg if (c != fchar)
524 1.1 mrg {
525 1.1 mrg (funs->unget) (c, data);
526 1.1 mrg if (c == EOF)
527 1.1 mrg {
528 1.1 mrg eof_no_match:
529 1.1 mrg if (fields == 0)
530 1.1 mrg fields = EOF;
531 1.1 mrg }
532 1.1 mrg goto done;
533 1.1 mrg }
534 1.1 mrg chars++;
535 1.1 mrg continue;
536 1.1 mrg }
537 1.1 mrg
538 1.1 mrg param.type = '\0';
539 1.1 mrg param.base = 0; /* for e,f,g,i */
540 1.1 mrg param.ignore = 0;
541 1.1 mrg param.width = 0;
542 1.1 mrg
543 1.1 mrg this_fmt = fmt-1;
544 1.1 mrg TRACE (printf (" this_fmt \"%s\"\n", this_fmt));
545 1.1 mrg
546 1.1 mrg for (;;)
547 1.1 mrg {
548 1.1 mrg ASSERT (fmt <= end_fmt);
549 1.1 mrg
550 1.1 mrg fchar = *fmt++;
551 1.1 mrg switch (fchar) {
552 1.1 mrg
553 1.1 mrg case '\0': /* unterminated % sequence */
554 1.1 mrg ASSERT (0);
555 1.1 mrg goto done;
556 1.1 mrg
557 1.1 mrg case '%': /* literal % */
558 1.1 mrg goto literal;
559 1.1 mrg
560 1.1 mrg case '[': /* character range */
561 1.1 mrg fchar = *fmt++;
562 1.1 mrg if (fchar == '^')
563 1.1 mrg fchar = *fmt++;
564 1.1 mrg /* ']' allowed as the first char (possibly after '^') */
565 1.1 mrg if (fchar == ']')
566 1.1 mrg fchar = *fmt++;
567 1.1 mrg for (;;)
568 1.1 mrg {
569 1.1 mrg ASSERT (fmt <= end_fmt);
570 1.1 mrg if (fchar == '\0')
571 1.1 mrg {
572 1.1 mrg /* unterminated % sequence */
573 1.1 mrg ASSERT (0);
574 1.1 mrg goto done;
575 1.1 mrg }
576 1.1 mrg if (fchar == ']')
577 1.1 mrg break;
578 1.1 mrg fchar = *fmt++;
579 1.1 mrg }
580 1.1 mrg /*FALLTHRU*/
581 1.1 mrg case 'c': /* characters */
582 1.1 mrg case 's': /* string of non-whitespace */
583 1.1 mrg case 'p': /* pointer */
584 1.1 mrg libc_type:
585 1.1 mrg len = fmt - this_fmt;
586 1.1 mrg memcpy (alloc_fmt, this_fmt, len);
587 1.1 mrg alloc_fmt[len++] = '%';
588 1.1 mrg alloc_fmt[len++] = 'n';
589 1.1 mrg alloc_fmt[len] = '\0';
590 1.1 mrg
591 1.1 mrg TRACE (printf (" scan \"%s\"\n", alloc_fmt);
592 1.1 mrg if (funs->scan == (gmp_doscan_scan_t) sscanf)
593 1.1 mrg printf (" s=\"%s\"\n", * (const char **) data));
594 1.1 mrg
595 1.1 mrg new_chars = -1;
596 1.1 mrg if (param.ignore)
597 1.1 mrg {
598 1.1 mrg new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL);
599 1.1 mrg ASSERT (new_fields == 0 || new_fields == EOF);
600 1.1 mrg }
601 1.1 mrg else
602 1.1 mrg {
603 1.1 mrg void *arg = va_arg (ap, void *);
604 1.1 mrg new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars);
605 1.1 mrg ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
606 1.1 mrg
607 1.1 mrg if (new_fields == 0)
608 1.1 mrg goto done; /* invalid input */
609 1.1 mrg
610 1.1 mrg if (new_fields == 1)
611 1.1 mrg ASSERT (new_chars != -1);
612 1.1 mrg }
613 1.1 mrg TRACE (printf (" new_fields %d new_chars %d\n",
614 1.1 mrg new_fields, new_chars));
615 1.1 mrg
616 1.1 mrg if (new_fields == -1)
617 1.1 mrg goto eof_no_match; /* EOF before anything matched */
618 1.1 mrg
619 1.1 mrg /* Under param.ignore, when new_fields==0 we don't know if
620 1.1 mrg it's a successful match or an invalid field. new_chars
621 1.1 mrg won't have been assigned if it was an invalid field. */
622 1.1 mrg if (new_chars == -1)
623 1.1 mrg goto done; /* invalid input */
624 1.1 mrg
625 1.1 mrg chars += new_chars;
626 1.1 mrg (*funs->step) (data, new_chars);
627 1.1 mrg
628 1.1 mrg increment_fields:
629 1.1 mrg if (! param.ignore)
630 1.1 mrg fields++;
631 1.1 mrg goto next;
632 1.1 mrg
633 1.1 mrg case 'd': /* decimal */
634 1.1 mrg case 'u': /* decimal */
635 1.1 mrg param.base = 10;
636 1.1 mrg goto numeric;
637 1.1 mrg
638 1.1 mrg case 'e': /* float */
639 1.1 mrg case 'E': /* float */
640 1.1 mrg case 'f': /* float */
641 1.1 mrg case 'g': /* float */
642 1.1 mrg case 'G': /* float */
643 1.1 mrg case 'i': /* integer with base marker */
644 1.1 mrg numeric:
645 1.1 mrg if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
646 1.1 mrg goto libc_type;
647 1.1 mrg
648 1.1 mrg chars += skip_white (funs, data);
649 1.1 mrg
650 1.1 mrg new_chars = gmpscan (funs, data, ¶m,
651 1.1 mrg param.ignore ? NULL : va_arg (ap, void*));
652 1.1 mrg if (new_chars == -2)
653 1.1 mrg goto eof_no_match;
654 1.1 mrg if (new_chars == -1)
655 1.1 mrg goto done;
656 1.1 mrg
657 1.1 mrg ASSERT (new_chars >= 0);
658 1.1 mrg chars += new_chars;
659 1.1 mrg goto increment_fields;
660 1.1 mrg
661 1.1 mrg case 'a': /* glibc allocate string */
662 1.1 mrg case '\'': /* glibc digit groupings */
663 1.1 mrg break;
664 1.1 mrg
665 1.1 mrg case 'F': /* mpf_t */
666 1.1 mrg case 'j': /* intmax_t */
667 1.1 mrg case 'L': /* long long */
668 1.1 mrg case 'q': /* quad_t */
669 1.1 mrg case 'Q': /* mpq_t */
670 1.1 mrg case 't': /* ptrdiff_t */
671 1.1 mrg case 'z': /* size_t */
672 1.1 mrg case 'Z': /* mpz_t */
673 1.1 mrg set_type:
674 1.1 mrg param.type = fchar;
675 1.1 mrg break;
676 1.1 mrg
677 1.1 mrg case 'h': /* short or char */
678 1.1 mrg if (param.type != 'h')
679 1.1 mrg goto set_type;
680 1.1 mrg param.type = 'H'; /* internal code for "hh" */
681 1.1 mrg break;
682 1.1 mrg
683 1.1 mrg goto numeric;
684 1.1 mrg
685 1.1 mrg case 'l': /* long, long long, double or long double */
686 1.1 mrg if (param.type != 'l')
687 1.1 mrg goto set_type;
688 1.1 mrg param.type = 'L'; /* "ll" means "L" */
689 1.1 mrg break;
690 1.1 mrg
691 1.1 mrg case 'n':
692 1.1 mrg if (! param.ignore)
693 1.1 mrg {
694 1.1 mrg void *p;
695 1.1 mrg p = va_arg (ap, void *);
696 1.1 mrg TRACE (printf (" store %%n to %p\n", p));
697 1.1 mrg switch (param.type) {
698 1.1 mrg case '\0': * (int *) p = chars; break;
699 1.1 mrg case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break;
700 1.1 mrg case 'H': * (char *) p = chars; break;
701 1.1 mrg case 'h': * (short *) p = chars; break;
702 1.1 mrg #if HAVE_INTMAX_T
703 1.1 mrg case 'j': * (intmax_t *) p = chars; break;
704 1.1 mrg #else
705 1.1 mrg case 'j': ASSERT_FAIL (intmax_t not available); break;
706 1.1 mrg #endif
707 1.1 mrg case 'l': * (long *) p = chars; break;
708 1.1 mrg #if HAVE_QUAD_T && HAVE_LONG_LONG
709 1.1 mrg case 'q':
710 1.1 mrg ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
711 1.1 mrg /*FALLTHRU*/
712 1.1 mrg #else
713 1.1 mrg case 'q': ASSERT_FAIL (quad_t not available); break;
714 1.1 mrg #endif
715 1.1 mrg #if HAVE_LONG_LONG
716 1.1 mrg case 'L': * (long long *) p = chars; break;
717 1.1 mrg #else
718 1.1 mrg case 'L': ASSERT_FAIL (long long not available); break;
719 1.1 mrg #endif
720 1.1 mrg case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
721 1.1 mrg #if HAVE_PTRDIFF_T
722 1.1 mrg case 't': * (ptrdiff_t *) p = chars; break;
723 1.1 mrg #else
724 1.1 mrg case 't': ASSERT_FAIL (ptrdiff_t not available); break;
725 1.1 mrg #endif
726 1.1 mrg case 'z': * (size_t *) p = chars; break;
727 1.1 mrg case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break;
728 1.1 mrg default: ASSERT (0); break;
729 1.1 mrg }
730 1.1 mrg }
731 1.1 mrg goto next;
732 1.1 mrg
733 1.1 mrg case 'o':
734 1.1 mrg param.base = 8;
735 1.1 mrg goto numeric;
736 1.1 mrg
737 1.1 mrg case 'x':
738 1.1 mrg case 'X':
739 1.1 mrg param.base = 16;
740 1.1 mrg goto numeric;
741 1.1 mrg
742 1.1 mrg case '0': case '1': case '2': case '3': case '4':
743 1.1 mrg case '5': case '6': case '7': case '8': case '9':
744 1.1 mrg param.width = 0;
745 1.1 mrg do {
746 1.1 mrg param.width = param.width * 10 + (fchar-'0');
747 1.1 mrg fchar = *fmt++;
748 1.1 mrg } while (isdigit (fchar));
749 1.1 mrg fmt--; /* unget the non-digit */
750 1.1 mrg break;
751 1.1 mrg
752 1.1 mrg case '*':
753 1.1 mrg param.ignore = 1;
754 1.1 mrg break;
755 1.1 mrg
756 1.1 mrg default:
757 1.1 mrg /* something invalid in a % sequence */
758 1.1 mrg ASSERT (0);
759 1.1 mrg goto next;
760 1.1 mrg }
761 1.1 mrg }
762 1.1 mrg }
763 1.1 mrg
764 1.1 mrg done:
765 1.1 mrg (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
766 1.1 mrg return fields;
767 1.1 mrg }
768