doscan.c revision 1.1.1.2 1 1.1 mrg /* __gmp_doscan -- formatted input internals.
2 1.1 mrg
3 1.1 mrg THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
4 1.1 mrg CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5 1.1 mrg FUTURE GNU MP RELEASES.
6 1.1 mrg
7 1.1.1.2 mrg Copyright 2001-2003 Free Software Foundation, Inc.
8 1.1 mrg
9 1.1 mrg This file is part of the GNU MP Library.
10 1.1 mrg
11 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify
12 1.1.1.2 mrg it under the terms of either:
13 1.1.1.2 mrg
14 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free
15 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your
16 1.1.1.2 mrg option) any later version.
17 1.1.1.2 mrg
18 1.1.1.2 mrg or
19 1.1.1.2 mrg
20 1.1.1.2 mrg * the GNU General Public License as published by the Free Software
21 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any
22 1.1.1.2 mrg later version.
23 1.1.1.2 mrg
24 1.1.1.2 mrg or both in parallel, as here.
25 1.1 mrg
26 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but
27 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 1.1.1.2 mrg for more details.
30 1.1 mrg
31 1.1.1.2 mrg You should have received copies of the GNU General Public License and the
32 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not,
33 1.1.1.2 mrg see https://www.gnu.org/licenses/. */
34 1.1 mrg
35 1.1 mrg #define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */
36 1.1 mrg
37 1.1.1.2 mrg #include "config.h" /* needed for the HAVE_, could also move gmp incls */
38 1.1 mrg
39 1.1 mrg #include <stdarg.h>
40 1.1 mrg #include <ctype.h>
41 1.1 mrg #include <stddef.h> /* for ptrdiff_t */
42 1.1 mrg #include <stdio.h>
43 1.1 mrg #include <stdlib.h> /* for strtol */
44 1.1 mrg #include <string.h>
45 1.1 mrg
46 1.1 mrg #if HAVE_LANGINFO_H
47 1.1 mrg #include <langinfo.h> /* for nl_langinfo */
48 1.1 mrg #endif
49 1.1 mrg
50 1.1 mrg #if HAVE_LOCALE_H
51 1.1 mrg #include <locale.h> /* for localeconv */
52 1.1 mrg #endif
53 1.1 mrg
54 1.1 mrg #if HAVE_INTTYPES_H
55 1.1 mrg # include <inttypes.h> /* for intmax_t */
56 1.1 mrg #else
57 1.1 mrg # if HAVE_STDINT_H
58 1.1 mrg # include <stdint.h>
59 1.1 mrg # endif
60 1.1 mrg #endif
61 1.1 mrg
62 1.1 mrg #if HAVE_SYS_TYPES_H
63 1.1 mrg #include <sys/types.h> /* for quad_t */
64 1.1 mrg #endif
65 1.1 mrg
66 1.1 mrg #include "gmp.h"
67 1.1 mrg #include "gmp-impl.h"
68 1.1 mrg
69 1.1 mrg
70 1.1 mrg /* Change this to "#define TRACE(x) x" for some traces. */
71 1.1 mrg #define TRACE(x)
72 1.1 mrg
73 1.1 mrg
74 1.1 mrg /* General:
75 1.1 mrg
76 1.1 mrg It's necessary to parse up the format string to recognise the GMP
77 1.1 mrg extra types F, Q and Z. Other types and conversions are passed
78 1.1 mrg across to the standard sscanf or fscanf via funs->scan, for ease of
79 1.1 mrg implementation. This is essential in the case of something like glibc
80 1.1 mrg %p where the pointer format isn't actually documented.
81 1.1 mrg
82 1.1 mrg Because funs->scan doesn't get the whole input it can't put the right
83 1.1 mrg values in for %n, so that's handled in __gmp_doscan. Neither sscanf
84 1.1 mrg nor fscanf directly indicate how many characters were read, so an
85 1.1 mrg extra %n is appended to each run for that. For fscanf this merely
86 1.1 mrg supports our %n output, but for sscanf it lets funs->step move us
87 1.1 mrg along the input string.
88 1.1 mrg
89 1.1 mrg Whitespace and literal matches in the format string, including %%,
90 1.1 mrg are handled directly within __gmp_doscan. This is reasonably
91 1.1 mrg efficient, and avoids some suspicious behaviour observed in various
92 1.1 mrg system libc's. GLIBC 2.2.4 for instance returns 0 on
93 1.1 mrg
94 1.1 mrg sscanf(" ", " x")
95 1.1 mrg or
96 1.1 mrg sscanf(" ", " x%d",&n)
97 1.1 mrg
98 1.1 mrg whereas we think they should return EOF, since end-of-string is
99 1.1 mrg reached when a match of "x" is required.
100 1.1 mrg
101 1.1 mrg For standard % conversions, funs->scan is called once for each
102 1.1 mrg conversion. If we had vfscanf and vsscanf and could rely on their
103 1.1 mrg fixed text matching behaviour then we could call them with multiple
104 1.1 mrg consecutive standard conversions. But plain fscanf and sscanf work
105 1.1 mrg fine, and parsing one field at a time shouldn't be too much of a
106 1.1 mrg slowdown.
107 1.1 mrg
108 1.1 mrg gmpscan:
109 1.1 mrg
110 1.1 mrg gmpscan reads a gmp type. It's only used from one place, but is a
111 1.1 mrg separate subroutine to avoid a big chunk of complicated code in the
112 1.1 mrg middle of __gmp_doscan. Within gmpscan a couple of loopbacks make it
113 1.1 mrg possible to share code for parsing integers, rationals and floats.
114 1.1 mrg
115 1.1 mrg In gmpscan normally one char of lookahead is maintained, but when width
116 1.1 mrg is reached that stops, on the principle that an fgetc/ungetc of a char
117 1.1 mrg past where we're told to stop would be undesirable. "chars" is how many
118 1.1 mrg characters have been read so far, including the current c. When
119 1.1 mrg chars==width and another character is desired then a jump is done to the
120 1.1 mrg "convert" stage. c is invalid and mustn't be unget'ed in this case;
121 1.1 mrg chars is set to width+1 to indicate that.
122 1.1 mrg
123 1.1 mrg gmpscan normally returns the number of characters read. -1 means an
124 1.1 mrg invalid field, -2 means EOF reached before any matching characters
125 1.1 mrg were read.
126 1.1 mrg
127 1.1 mrg For hex floats, the mantissa part is passed to mpf_set_str, then the
128 1.1 mrg exponent is applied with mpf_mul_exp or mpf_div_2exp. This is easier
129 1.1 mrg than teaching mpf_set_str about an exponent factor (ie. 2) differing
130 1.1 mrg from the mantissa radix point factor (ie. 16). mpf_mul_exp and
131 1.1 mrg mpf_div_2exp will preserve the application requested precision, so
132 1.1 mrg nothing in that respect is lost by making this a two-step process.
133 1.1 mrg
134 1.1 mrg Matching and errors:
135 1.1 mrg
136 1.1 mrg C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest
137 1.1 mrg string which is a match for the appropriate type, or a prefix of a
138 1.1 mrg match. With that done, if it's only a prefix then the result is a
139 1.1 mrg matching failure, ie. invalid input.
140 1.1 mrg
141 1.1 mrg This rule seems fairly clear, but doesn't seem to be universally
142 1.1 mrg applied in system C libraries. Even GLIBC doesn't seem to get it
143 1.1 mrg right, insofar as it seems to accept some apparently invalid forms.
144 1.1 mrg Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the
145 1.1 mrg standard would suggest a non-empty sequence of digits should be
146 1.1 mrg required after an "0x".
147 1.1 mrg
148 1.1 mrg A footnote to 7.19.6.2 para 17 notes how this input item reading can
149 1.1 mrg mean inputs acceptable to strtol are not acceptable to fscanf. We
150 1.1 mrg think this confirms our reading of "0x" as invalid.
151 1.1 mrg
152 1.1 mrg Clearly gmp_sscanf could backtrack to a longest input which was a
153 1.1 mrg valid match for a given item, but this is not done, since C99 says
154 1.1 mrg sscanf is identical to fscanf, so we make gmp_sscanf identical to
155 1.1 mrg gmp_fscanf.
156 1.1 mrg
157 1.1 mrg Types:
158 1.1 mrg
159 1.1 mrg C99 says "ll" is for long long, and "L" is for long double floats.
160 1.1 mrg Unfortunately in GMP 4.1.1 we documented the two as equivalent. This
161 1.1 mrg doesn't affect us directly, since both are passed through to plain
162 1.1 mrg scanf. It seems wisest not to try to enforce the C99 rule. This is
163 1.1 mrg consistent with what we said before, though whether it actually
164 1.1 mrg worked was always up to the C library.
165 1.1 mrg
166 1.1 mrg Alternatives:
167 1.1 mrg
168 1.1 mrg Consideration was given to using separate code for gmp_fscanf and
169 1.1 mrg gmp_sscanf. The sscanf case could zip across a string doing literal
170 1.1 mrg matches or recognising digits in gmpscan, rather than making a
171 1.1 mrg function call fun->get per character. The fscanf could use getc
172 1.1 mrg rather than fgetc too, which might help those systems where getc is a
173 1.1 mrg macro or otherwise inlined. But none of this scanning and converting
174 1.1 mrg will be particularly fast, so the two are done together to keep it a
175 1.1 mrg little simpler for now.
176 1.1 mrg
177 1.1 mrg Various multibyte string issues are not addressed, for a start C99
178 1.1 mrg scanf says the format string is multibyte. Since we pass %c, %s and
179 1.1 mrg %[ to the system scanf, they might do multibyte reads already, but
180 1.1 mrg it's another matter whether or not that can be used, since our digit
181 1.1 mrg and whitespace parsing is only unibyte. The plan is to quietly
182 1.1 mrg ignore multibyte locales for now. This is not as bad as it sounds,
183 1.1 mrg since GMP is presumably used mostly on numbers, which can be
184 1.1 mrg perfectly adequately treated in plain ASCII.
185 1.1 mrg
186 1.1 mrg */
187 1.1 mrg
188 1.1 mrg
189 1.1 mrg struct gmp_doscan_params_t {
190 1.1 mrg int base;
191 1.1 mrg int ignore;
192 1.1 mrg char type;
193 1.1 mrg int width;
194 1.1 mrg };
195 1.1 mrg
196 1.1 mrg
197 1.1 mrg #define GET(c) \
198 1.1 mrg do { \
199 1.1 mrg ASSERT (chars <= width); \
200 1.1 mrg chars++; \
201 1.1 mrg if (chars > width) \
202 1.1 mrg goto convert; \
203 1.1 mrg (c) = (*funs->get) (data); \
204 1.1 mrg } while (0)
205 1.1 mrg
206 1.1 mrg /* store into "s", extending if necessary */
207 1.1 mrg #define STORE(c) \
208 1.1 mrg do { \
209 1.1 mrg ASSERT (s_upto <= s_alloc); \
210 1.1 mrg if (s_upto >= s_alloc) \
211 1.1 mrg { \
212 1.1 mrg size_t s_alloc_new = s_alloc + S_ALLOC_STEP; \
213 1.1 mrg s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \
214 1.1 mrg s_alloc = s_alloc_new; \
215 1.1 mrg } \
216 1.1 mrg s[s_upto++] = c; \
217 1.1 mrg } while (0)
218 1.1 mrg
219 1.1 mrg #define S_ALLOC_STEP 512
220 1.1 mrg
221 1.1 mrg static int
222 1.1 mrg gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
223 1.1 mrg const struct gmp_doscan_params_t *p, void *dst)
224 1.1 mrg {
225 1.1 mrg int chars, c, base, first, width, seen_point, seen_digit, hexfloat;
226 1.1 mrg size_t s_upto, s_alloc, hexexp;
227 1.1 mrg char *s;
228 1.1 mrg int invalid = 0;
229 1.1 mrg
230 1.1 mrg TRACE (printf ("gmpscan\n"));
231 1.1 mrg
232 1.1 mrg ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
233 1.1 mrg
234 1.1 mrg c = (*funs->get) (data);
235 1.1 mrg if (c == EOF)
236 1.1 mrg return -2;
237 1.1 mrg
238 1.1 mrg chars = 1;
239 1.1 mrg first = 1;
240 1.1 mrg seen_point = 0;
241 1.1 mrg width = (p->width == 0 ? INT_MAX-1 : p->width);
242 1.1 mrg base = p->base;
243 1.1 mrg s_alloc = S_ALLOC_STEP;
244 1.1 mrg s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);
245 1.1 mrg s_upto = 0;
246 1.1 mrg hexfloat = 0;
247 1.1 mrg hexexp = 0;
248 1.1 mrg
249 1.1 mrg another:
250 1.1 mrg seen_digit = 0;
251 1.1 mrg if (c == '-')
252 1.1 mrg {
253 1.1 mrg STORE (c);
254 1.1 mrg goto get_for_sign;
255 1.1 mrg }
256 1.1 mrg else if (c == '+')
257 1.1 mrg {
258 1.1 mrg /* don't store '+', it's not accepted by mpz_set_str etc */
259 1.1 mrg get_for_sign:
260 1.1 mrg GET (c);
261 1.1 mrg }
262 1.1 mrg
263 1.1 mrg if (base == 0)
264 1.1 mrg {
265 1.1 mrg base = 10; /* decimal if no base indicator */
266 1.1 mrg if (c == '0')
267 1.1 mrg {
268 1.1 mrg seen_digit = 1; /* 0 alone is a valid number */
269 1.1 mrg if (p->type != 'F')
270 1.1 mrg base = 8; /* leading 0 is octal, for non-floats */
271 1.1 mrg STORE (c);
272 1.1 mrg GET (c);
273 1.1 mrg if (c == 'x' || c == 'X')
274 1.1 mrg {
275 1.1 mrg base = 16;
276 1.1 mrg seen_digit = 0; /* must have digits after an 0x */
277 1.1 mrg if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */
278 1.1 mrg hexfloat = 1;
279 1.1 mrg else
280 1.1 mrg STORE (c);
281 1.1 mrg GET (c);
282 1.1 mrg }
283 1.1 mrg }
284 1.1 mrg }
285 1.1 mrg
286 1.1 mrg digits:
287 1.1 mrg for (;;)
288 1.1 mrg {
289 1.1 mrg if (base == 16)
290 1.1 mrg {
291 1.1 mrg if (! isxdigit (c))
292 1.1 mrg break;
293 1.1 mrg }
294 1.1 mrg else
295 1.1 mrg {
296 1.1 mrg if (! isdigit (c))
297 1.1 mrg break;
298 1.1 mrg if (base == 8 && (c == '8' || c == '9'))
299 1.1 mrg break;
300 1.1 mrg }
301 1.1 mrg
302 1.1 mrg seen_digit = 1;
303 1.1 mrg STORE (c);
304 1.1 mrg GET (c);
305 1.1 mrg }
306 1.1 mrg
307 1.1 mrg if (first)
308 1.1 mrg {
309 1.1 mrg /* decimal point */
310 1.1 mrg if (p->type == 'F' && ! seen_point)
311 1.1 mrg {
312 1.1 mrg /* For a multi-character decimal point, if the first character is
313 1.1 mrg present then all of it must be, otherwise the input is
314 1.1 mrg considered invalid. */
315 1.1 mrg const char *point = GMP_DECIMAL_POINT;
316 1.1 mrg int pc = (unsigned char) *point++;
317 1.1 mrg if (c == pc)
318 1.1 mrg {
319 1.1 mrg for (;;)
320 1.1 mrg {
321 1.1 mrg STORE (c);
322 1.1 mrg GET (c);
323 1.1 mrg pc = (unsigned char) *point++;
324 1.1 mrg if (pc == '\0')
325 1.1 mrg break;
326 1.1 mrg if (c != pc)
327 1.1 mrg goto set_invalid;
328 1.1 mrg }
329 1.1 mrg seen_point = 1;
330 1.1 mrg goto digits;
331 1.1 mrg }
332 1.1 mrg }
333 1.1 mrg
334 1.1 mrg /* exponent */
335 1.1 mrg if (p->type == 'F')
336 1.1 mrg {
337 1.1 mrg if (hexfloat && (c == 'p' || c == 'P'))
338 1.1 mrg {
339 1.1 mrg hexexp = s_upto; /* exponent location */
340 1.1 mrg base = 10; /* exponent in decimal */
341 1.1 mrg goto exponent;
342 1.1 mrg }
343 1.1 mrg else if (! hexfloat && (c == 'e' || c == 'E'))
344 1.1 mrg {
345 1.1 mrg exponent:
346 1.1 mrg /* must have at least one digit in the mantissa, just an exponent
347 1.1 mrg is not good enough */
348 1.1 mrg if (! seen_digit)
349 1.1 mrg goto set_invalid;
350 1.1 mrg
351 1.1 mrg do_second:
352 1.1 mrg first = 0;
353 1.1 mrg STORE (c);
354 1.1 mrg GET (c);
355 1.1 mrg goto another;
356 1.1 mrg }
357 1.1 mrg }
358 1.1 mrg
359 1.1 mrg /* denominator */
360 1.1 mrg if (p->type == 'Q' && c == '/')
361 1.1 mrg {
362 1.1 mrg /* must have at least one digit in the numerator */
363 1.1 mrg if (! seen_digit)
364 1.1 mrg goto set_invalid;
365 1.1 mrg
366 1.1 mrg /* now look for at least one digit in the denominator */
367 1.1 mrg seen_digit = 0;
368 1.1 mrg
369 1.1 mrg /* allow the base to be redetermined for "%i" */
370 1.1 mrg base = p->base;
371 1.1 mrg goto do_second;
372 1.1 mrg }
373 1.1 mrg }
374 1.1 mrg
375 1.1 mrg convert:
376 1.1 mrg if (! seen_digit)
377 1.1 mrg {
378 1.1 mrg set_invalid:
379 1.1 mrg invalid = 1;
380 1.1 mrg goto done;
381 1.1 mrg }
382 1.1 mrg
383 1.1 mrg if (! p->ignore)
384 1.1 mrg {
385 1.1 mrg STORE ('\0');
386 1.1 mrg TRACE (printf (" convert \"%s\"\n", s));
387 1.1 mrg
388 1.1 mrg /* We ought to have parsed out a valid string above, so just test
389 1.1 mrg mpz_set_str etc with an ASSERT. */
390 1.1 mrg switch (p->type) {
391 1.1 mrg case 'F':
392 1.1 mrg {
393 1.1 mrg mpf_ptr f = (mpf_ptr) dst;
394 1.1 mrg if (hexexp != 0)
395 1.1 mrg s[hexexp] = '\0';
396 1.1 mrg ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10));
397 1.1 mrg if (hexexp != 0)
398 1.1 mrg {
399 1.1 mrg char *dummy;
400 1.1 mrg long exp;
401 1.1 mrg exp = strtol (s + hexexp + 1, &dummy, 10);
402 1.1 mrg if (exp >= 0)
403 1.1 mrg mpf_mul_2exp (f, f, (unsigned long) exp);
404 1.1 mrg else
405 1.1 mrg mpf_div_2exp (f, f, - (unsigned long) exp);
406 1.1 mrg }
407 1.1 mrg }
408 1.1 mrg break;
409 1.1 mrg case 'Q':
410 1.1 mrg ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));
411 1.1 mrg break;
412 1.1 mrg case 'Z':
413 1.1 mrg ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));
414 1.1 mrg break;
415 1.1 mrg default:
416 1.1 mrg ASSERT (0);
417 1.1 mrg /*FALLTHRU*/
418 1.1 mrg break;
419 1.1 mrg }
420 1.1 mrg }
421 1.1 mrg
422 1.1 mrg done:
423 1.1 mrg ASSERT (chars <= width+1);
424 1.1 mrg if (chars != width+1)
425 1.1 mrg {
426 1.1 mrg (*funs->unget) (c, data);
427 1.1 mrg TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1));
428 1.1 mrg }
429 1.1 mrg chars--;
430 1.1 mrg
431 1.1 mrg (*__gmp_free_func) (s, s_alloc);
432 1.1 mrg
433 1.1 mrg if (invalid)
434 1.1 mrg {
435 1.1 mrg TRACE (printf (" invalid\n"));
436 1.1 mrg return -1;
437 1.1 mrg }
438 1.1 mrg
439 1.1 mrg TRACE (printf (" return %d chars (cf width %d)\n", chars, width));
440 1.1 mrg return chars;
441 1.1 mrg }
442 1.1 mrg
443 1.1 mrg
444 1.1 mrg /* Read and discard whitespace, if any. Return number of chars skipped.
445 1.1 mrg Whitespace skipping never provokes the EOF return from __gmp_doscan, so
446 1.1 mrg it's not necessary to watch for EOF from funs->get, */
447 1.1 mrg static int
448 1.1 mrg skip_white (const struct gmp_doscan_funs_t *funs, void *data)
449 1.1 mrg {
450 1.1 mrg int c;
451 1.1 mrg int ret = 0;
452 1.1 mrg
453 1.1 mrg do
454 1.1 mrg {
455 1.1 mrg c = (funs->get) (data);
456 1.1 mrg ret++;
457 1.1 mrg }
458 1.1 mrg while (isspace (c));
459 1.1 mrg
460 1.1 mrg (funs->unget) (c, data);
461 1.1 mrg ret--;
462 1.1 mrg
463 1.1 mrg TRACE (printf (" skip white %d\n", ret));
464 1.1 mrg return ret;
465 1.1 mrg }
466 1.1 mrg
467 1.1 mrg
468 1.1 mrg int
469 1.1 mrg __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
470 1.1 mrg const char *orig_fmt, va_list orig_ap)
471 1.1 mrg {
472 1.1 mrg struct gmp_doscan_params_t param;
473 1.1 mrg va_list ap;
474 1.1 mrg char *alloc_fmt;
475 1.1 mrg const char *fmt, *this_fmt, *end_fmt;
476 1.1 mrg size_t orig_fmt_len, alloc_fmt_size, len;
477 1.1 mrg int new_fields, new_chars;
478 1.1 mrg char fchar;
479 1.1 mrg int fields = 0;
480 1.1 mrg int chars = 0;
481 1.1 mrg
482 1.1 mrg TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt);
483 1.1 mrg if (funs->scan == (gmp_doscan_scan_t) sscanf)
484 1.1 mrg printf (" s=\"%s\"\n", * (const char **) data));
485 1.1 mrg
486 1.1 mrg /* Don't modify orig_ap, if va_list is actually an array and hence call by
487 1.1 mrg reference. It could be argued that it'd be more efficient to leave
488 1.1 mrg callers to make a copy if they care, but doing so here is going to be a
489 1.1 mrg very small part of the total work, and we may as well keep applications
490 1.1 mrg out of trouble. */
491 1.1 mrg va_copy (ap, orig_ap);
492 1.1 mrg
493 1.1 mrg /* Parts of the format string are going to be copied so that a " %n" can
494 1.1 mrg be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be
495 1.1 mrg needed if fmt consists of a single "%" specifier, but otherwise is an
496 1.1 mrg overestimate. We're not going to be very fast here, so use
497 1.1 mrg __gmp_allocate_func rather than TMP_ALLOC. */
498 1.1 mrg orig_fmt_len = strlen (orig_fmt);
499 1.1 mrg alloc_fmt_size = orig_fmt_len + 4;
500 1.1 mrg alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
501 1.1 mrg
502 1.1 mrg fmt = orig_fmt;
503 1.1 mrg end_fmt = orig_fmt + orig_fmt_len;
504 1.1 mrg
505 1.1 mrg for (;;)
506 1.1 mrg {
507 1.1 mrg next:
508 1.1 mrg fchar = *fmt++;
509 1.1 mrg
510 1.1 mrg if (fchar == '\0')
511 1.1 mrg break;
512 1.1 mrg
513 1.1 mrg if (isspace (fchar))
514 1.1 mrg {
515 1.1 mrg chars += skip_white (funs, data);
516 1.1 mrg continue;
517 1.1 mrg }
518 1.1 mrg
519 1.1 mrg if (fchar != '%')
520 1.1 mrg {
521 1.1 mrg int c;
522 1.1 mrg literal:
523 1.1 mrg c = (funs->get) (data);
524 1.1 mrg if (c != fchar)
525 1.1 mrg {
526 1.1 mrg (funs->unget) (c, data);
527 1.1 mrg if (c == EOF)
528 1.1 mrg {
529 1.1 mrg eof_no_match:
530 1.1 mrg if (fields == 0)
531 1.1 mrg fields = EOF;
532 1.1 mrg }
533 1.1 mrg goto done;
534 1.1 mrg }
535 1.1 mrg chars++;
536 1.1 mrg continue;
537 1.1 mrg }
538 1.1 mrg
539 1.1 mrg param.type = '\0';
540 1.1 mrg param.base = 0; /* for e,f,g,i */
541 1.1 mrg param.ignore = 0;
542 1.1 mrg param.width = 0;
543 1.1 mrg
544 1.1 mrg this_fmt = fmt-1;
545 1.1 mrg TRACE (printf (" this_fmt \"%s\"\n", this_fmt));
546 1.1 mrg
547 1.1 mrg for (;;)
548 1.1 mrg {
549 1.1 mrg ASSERT (fmt <= end_fmt);
550 1.1 mrg
551 1.1 mrg fchar = *fmt++;
552 1.1 mrg switch (fchar) {
553 1.1 mrg
554 1.1 mrg case '\0': /* unterminated % sequence */
555 1.1 mrg ASSERT (0);
556 1.1 mrg goto done;
557 1.1 mrg
558 1.1 mrg case '%': /* literal % */
559 1.1 mrg goto literal;
560 1.1 mrg
561 1.1 mrg case '[': /* character range */
562 1.1 mrg fchar = *fmt++;
563 1.1 mrg if (fchar == '^')
564 1.1 mrg fchar = *fmt++;
565 1.1 mrg /* ']' allowed as the first char (possibly after '^') */
566 1.1 mrg if (fchar == ']')
567 1.1 mrg fchar = *fmt++;
568 1.1 mrg for (;;)
569 1.1 mrg {
570 1.1 mrg ASSERT (fmt <= end_fmt);
571 1.1 mrg if (fchar == '\0')
572 1.1 mrg {
573 1.1 mrg /* unterminated % sequence */
574 1.1 mrg ASSERT (0);
575 1.1 mrg goto done;
576 1.1 mrg }
577 1.1 mrg if (fchar == ']')
578 1.1 mrg break;
579 1.1 mrg fchar = *fmt++;
580 1.1 mrg }
581 1.1 mrg /*FALLTHRU*/
582 1.1 mrg case 'c': /* characters */
583 1.1 mrg case 's': /* string of non-whitespace */
584 1.1 mrg case 'p': /* pointer */
585 1.1 mrg libc_type:
586 1.1 mrg len = fmt - this_fmt;
587 1.1 mrg memcpy (alloc_fmt, this_fmt, len);
588 1.1 mrg alloc_fmt[len++] = '%';
589 1.1 mrg alloc_fmt[len++] = 'n';
590 1.1 mrg alloc_fmt[len] = '\0';
591 1.1 mrg
592 1.1 mrg TRACE (printf (" scan \"%s\"\n", alloc_fmt);
593 1.1 mrg if (funs->scan == (gmp_doscan_scan_t) sscanf)
594 1.1 mrg printf (" s=\"%s\"\n", * (const char **) data));
595 1.1 mrg
596 1.1 mrg new_chars = -1;
597 1.1 mrg if (param.ignore)
598 1.1 mrg {
599 1.1 mrg new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL);
600 1.1 mrg ASSERT (new_fields == 0 || new_fields == EOF);
601 1.1 mrg }
602 1.1 mrg else
603 1.1 mrg {
604 1.1 mrg void *arg = va_arg (ap, void *);
605 1.1 mrg new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars);
606 1.1 mrg ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
607 1.1 mrg
608 1.1 mrg if (new_fields == 0)
609 1.1 mrg goto done; /* invalid input */
610 1.1 mrg
611 1.1 mrg if (new_fields == 1)
612 1.1 mrg ASSERT (new_chars != -1);
613 1.1 mrg }
614 1.1 mrg TRACE (printf (" new_fields %d new_chars %d\n",
615 1.1 mrg new_fields, new_chars));
616 1.1 mrg
617 1.1 mrg if (new_fields == -1)
618 1.1 mrg goto eof_no_match; /* EOF before anything matched */
619 1.1 mrg
620 1.1 mrg /* Under param.ignore, when new_fields==0 we don't know if
621 1.1 mrg it's a successful match or an invalid field. new_chars
622 1.1 mrg won't have been assigned if it was an invalid field. */
623 1.1 mrg if (new_chars == -1)
624 1.1 mrg goto done; /* invalid input */
625 1.1 mrg
626 1.1 mrg chars += new_chars;
627 1.1 mrg (*funs->step) (data, new_chars);
628 1.1 mrg
629 1.1 mrg increment_fields:
630 1.1 mrg if (! param.ignore)
631 1.1 mrg fields++;
632 1.1 mrg goto next;
633 1.1 mrg
634 1.1 mrg case 'd': /* decimal */
635 1.1 mrg case 'u': /* decimal */
636 1.1 mrg param.base = 10;
637 1.1 mrg goto numeric;
638 1.1 mrg
639 1.1 mrg case 'e': /* float */
640 1.1 mrg case 'E': /* float */
641 1.1 mrg case 'f': /* float */
642 1.1 mrg case 'g': /* float */
643 1.1 mrg case 'G': /* float */
644 1.1 mrg case 'i': /* integer with base marker */
645 1.1 mrg numeric:
646 1.1 mrg if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
647 1.1 mrg goto libc_type;
648 1.1 mrg
649 1.1 mrg chars += skip_white (funs, data);
650 1.1 mrg
651 1.1 mrg new_chars = gmpscan (funs, data, ¶m,
652 1.1 mrg param.ignore ? NULL : va_arg (ap, void*));
653 1.1 mrg if (new_chars == -2)
654 1.1 mrg goto eof_no_match;
655 1.1 mrg if (new_chars == -1)
656 1.1 mrg goto done;
657 1.1 mrg
658 1.1 mrg ASSERT (new_chars >= 0);
659 1.1 mrg chars += new_chars;
660 1.1 mrg goto increment_fields;
661 1.1 mrg
662 1.1 mrg case 'a': /* glibc allocate string */
663 1.1 mrg case '\'': /* glibc digit groupings */
664 1.1 mrg break;
665 1.1 mrg
666 1.1 mrg case 'F': /* mpf_t */
667 1.1 mrg case 'j': /* intmax_t */
668 1.1 mrg case 'L': /* long long */
669 1.1 mrg case 'q': /* quad_t */
670 1.1 mrg case 'Q': /* mpq_t */
671 1.1 mrg case 't': /* ptrdiff_t */
672 1.1 mrg case 'z': /* size_t */
673 1.1 mrg case 'Z': /* mpz_t */
674 1.1 mrg set_type:
675 1.1 mrg param.type = fchar;
676 1.1 mrg break;
677 1.1 mrg
678 1.1 mrg case 'h': /* short or char */
679 1.1 mrg if (param.type != 'h')
680 1.1 mrg goto set_type;
681 1.1 mrg param.type = 'H'; /* internal code for "hh" */
682 1.1 mrg break;
683 1.1 mrg
684 1.1 mrg goto numeric;
685 1.1 mrg
686 1.1 mrg case 'l': /* long, long long, double or long double */
687 1.1 mrg if (param.type != 'l')
688 1.1 mrg goto set_type;
689 1.1 mrg param.type = 'L'; /* "ll" means "L" */
690 1.1 mrg break;
691 1.1 mrg
692 1.1 mrg case 'n':
693 1.1 mrg if (! param.ignore)
694 1.1 mrg {
695 1.1 mrg void *p;
696 1.1 mrg p = va_arg (ap, void *);
697 1.1 mrg TRACE (printf (" store %%n to %p\n", p));
698 1.1 mrg switch (param.type) {
699 1.1 mrg case '\0': * (int *) p = chars; break;
700 1.1 mrg case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break;
701 1.1 mrg case 'H': * (char *) p = chars; break;
702 1.1 mrg case 'h': * (short *) p = chars; break;
703 1.1 mrg #if HAVE_INTMAX_T
704 1.1 mrg case 'j': * (intmax_t *) p = chars; break;
705 1.1 mrg #else
706 1.1 mrg case 'j': ASSERT_FAIL (intmax_t not available); break;
707 1.1 mrg #endif
708 1.1 mrg case 'l': * (long *) p = chars; break;
709 1.1 mrg #if HAVE_QUAD_T && HAVE_LONG_LONG
710 1.1 mrg case 'q':
711 1.1 mrg ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
712 1.1 mrg /*FALLTHRU*/
713 1.1 mrg #else
714 1.1 mrg case 'q': ASSERT_FAIL (quad_t not available); break;
715 1.1 mrg #endif
716 1.1 mrg #if HAVE_LONG_LONG
717 1.1 mrg case 'L': * (long long *) p = chars; break;
718 1.1 mrg #else
719 1.1 mrg case 'L': ASSERT_FAIL (long long not available); break;
720 1.1 mrg #endif
721 1.1 mrg case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
722 1.1 mrg #if HAVE_PTRDIFF_T
723 1.1 mrg case 't': * (ptrdiff_t *) p = chars; break;
724 1.1 mrg #else
725 1.1 mrg case 't': ASSERT_FAIL (ptrdiff_t not available); break;
726 1.1 mrg #endif
727 1.1 mrg case 'z': * (size_t *) p = chars; break;
728 1.1 mrg case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break;
729 1.1 mrg default: ASSERT (0); break;
730 1.1 mrg }
731 1.1 mrg }
732 1.1 mrg goto next;
733 1.1 mrg
734 1.1 mrg case 'o':
735 1.1 mrg param.base = 8;
736 1.1 mrg goto numeric;
737 1.1 mrg
738 1.1 mrg case 'x':
739 1.1 mrg case 'X':
740 1.1 mrg param.base = 16;
741 1.1 mrg goto numeric;
742 1.1 mrg
743 1.1 mrg case '0': case '1': case '2': case '3': case '4':
744 1.1 mrg case '5': case '6': case '7': case '8': case '9':
745 1.1 mrg param.width = 0;
746 1.1 mrg do {
747 1.1 mrg param.width = param.width * 10 + (fchar-'0');
748 1.1 mrg fchar = *fmt++;
749 1.1 mrg } while (isdigit (fchar));
750 1.1 mrg fmt--; /* unget the non-digit */
751 1.1 mrg break;
752 1.1 mrg
753 1.1 mrg case '*':
754 1.1 mrg param.ignore = 1;
755 1.1 mrg break;
756 1.1 mrg
757 1.1 mrg default:
758 1.1 mrg /* something invalid in a % sequence */
759 1.1 mrg ASSERT (0);
760 1.1 mrg goto next;
761 1.1 mrg }
762 1.1 mrg }
763 1.1 mrg }
764 1.1 mrg
765 1.1 mrg done:
766 1.1 mrg (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
767 1.1 mrg return fields;
768 1.1 mrg }
769