vfscanf.c revision 1.16 1 /* $NetBSD: vfscanf.c,v 1.16 1997/07/13 20:15:35 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Chris Torek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #if defined(LIBC_SCCS) && !defined(lint)
41 #if 0
42 static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93";
43 #else
44 __RCSID("$NetBSD: vfscanf.c,v 1.16 1997/07/13 20:15:35 christos Exp $");
45 #endif
46 #endif /* LIBC_SCCS and not lint */
47
48 #include "namespace.h"
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <ctype.h>
52 #if __STDC__
53 #include <stdarg.h>
54 #else
55 #include <varargs.h>
56 #endif
57 #include "local.h"
58
59 #ifdef FLOATING_POINT
60 #include "floatio.h"
61 #endif
62
63 #define BUF 513 /* Maximum length of numeric string. */
64
65 /*
66 * Flags used during conversion.
67 */
68 #define LONG 0x01 /* l: long or double */
69 #define LONGDBL 0x02 /* L: long double; unimplemented */
70 #define SHORT 0x04 /* h: short */
71 #define QUAD 0x08 /* q: quad */
72 #define SUPPRESS 0x10 /* suppress assignment */
73 #define POINTER 0x20 /* weird %p pointer (`fake hex') */
74 #define NOSKIP 0x40 /* do not skip blanks */
75
76 /*
77 * The following are used in numeric conversions only:
78 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
79 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
80 */
81 #define SIGNOK 0x080 /* +/- is (still) legal */
82 #define NDIGITS 0x100 /* no digits detected */
83
84 #define DPTOK 0x200 /* (float) decimal point is still legal */
85 #define EXPOK 0x400 /* (float) exponent (e+3, etc) still legal */
86
87 #define PFXOK 0x200 /* 0x prefix is (still) legal */
88 #define NZDIGITS 0x400 /* no zero digits detected */
89
90 /*
91 * Conversion types.
92 */
93 #define CT_CHAR 0 /* %c conversion */
94 #define CT_CCL 1 /* %[...] conversion */
95 #define CT_STRING 2 /* %s conversion */
96 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */
97 #define CT_FLOAT 4 /* floating, i.e., strtod */
98
99 #define u_char unsigned char
100 #define u_long unsigned long
101
102 static u_char *__sccl __P((char *, u_char *));
103
104 /*
105 * vfscanf
106 */
107 int
108 __svfscanf(fp, fmt0, ap)
109 register FILE *fp;
110 char const *fmt0;
111 _BSD_VA_LIST_ ap;
112 {
113 register u_char *fmt = (u_char *)fmt0;
114 register int c; /* character from format, or conversion */
115 register size_t width; /* field width, or 0 */
116 register char *p; /* points into all kinds of strings */
117 register int n; /* handy integer */
118 register int flags; /* flags as defined above */
119 register char *p0; /* saves original value of p when necessary */
120 int nassigned; /* number of fields assigned */
121 int nread; /* number of characters consumed from fp */
122 int base; /* base argument to strtoq/strtouq */
123 u_quad_t (*ccfn) __P((const char *, char **, int));
124 /* conversion function (strtoq/strtouq) */
125 char ccltab[256]; /* character class table for %[...] */
126 char buf[BUF]; /* buffer for numeric conversions */
127
128 /* `basefix' is used to avoid `if' tests in the integer scanner */
129 static short basefix[17] =
130 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
131
132 nassigned = 0;
133 nread = 0;
134 base = 0; /* XXX just to keep gcc happy */
135 ccfn = NULL; /* XXX just to keep gcc happy */
136 for (;;) {
137 c = *fmt++;
138 if (c == 0)
139 return (nassigned);
140 if (isspace(c)) {
141 for (;;) {
142 if (fp->_r <= 0 && __srefill(fp))
143 return (nassigned);
144 if (!isspace(*fp->_p))
145 break;
146 nread++, fp->_r--, fp->_p++;
147 }
148 continue;
149 }
150 if (c != '%')
151 goto literal;
152 width = 0;
153 flags = 0;
154 /*
155 * switch on the format. continue if done;
156 * break once format type is derived.
157 */
158 again: c = *fmt++;
159 switch (c) {
160 case '%':
161 literal:
162 if (fp->_r <= 0 && __srefill(fp))
163 goto input_failure;
164 if (*fp->_p != c)
165 goto match_failure;
166 fp->_r--, fp->_p++;
167 nread++;
168 continue;
169
170 case '*':
171 flags |= SUPPRESS;
172 goto again;
173 case 'L':
174 flags |= LONGDBL;
175 goto again;
176 case 'h':
177 flags |= SHORT;
178 goto again;
179 case 'l':
180 if (*fmt == 'l') {
181 fmt++;
182 flags |= QUAD;
183 } else {
184 flags |= LONG;
185 }
186 goto again;
187 case 'q':
188 flags |= QUAD;
189 goto again;
190
191 case '0': case '1': case '2': case '3': case '4':
192 case '5': case '6': case '7': case '8': case '9':
193 width = width * 10 + c - '0';
194 goto again;
195
196 /*
197 * Conversions.
198 * Those marked `compat' are for 4.[123]BSD compatibility.
199 *
200 * (According to ANSI, E and X formats are supposed
201 * to the same as e and x. Sorry about that.)
202 */
203 case 'D': /* compat */
204 flags |= LONG;
205 /* FALLTHROUGH */
206 case 'd':
207 c = CT_INT;
208 ccfn = (u_quad_t (*) __P((const char *, char **, int)))strtoq;
209 base = 10;
210 break;
211
212 case 'i':
213 c = CT_INT;
214 ccfn = (u_quad_t (*) __P((const char *, char **, int)))strtoq;
215 base = 0;
216 break;
217
218 case 'O': /* compat */
219 flags |= LONG;
220 /* FALLTHROUGH */
221 case 'o':
222 c = CT_INT;
223 ccfn = strtouq;
224 base = 8;
225 break;
226
227 case 'u':
228 c = CT_INT;
229 ccfn = strtouq;
230 base = 10;
231 break;
232
233 case 'X':
234 case 'x':
235 flags |= PFXOK; /* enable 0x prefixing */
236 c = CT_INT;
237 ccfn = strtouq;
238 base = 16;
239 break;
240
241 #ifdef FLOATING_POINT
242 case 'E':
243 case 'G':
244 case 'e':
245 case 'f':
246 case 'g':
247 c = CT_FLOAT;
248 break;
249 #endif
250
251 case 's':
252 c = CT_STRING;
253 break;
254
255 case '[':
256 fmt = __sccl(ccltab, fmt);
257 flags |= NOSKIP;
258 c = CT_CCL;
259 break;
260
261 case 'c':
262 flags |= NOSKIP;
263 c = CT_CHAR;
264 break;
265
266 case 'p': /* pointer format is like hex */
267 flags |= POINTER | PFXOK;
268 c = CT_INT;
269 ccfn = strtouq;
270 base = 16;
271 break;
272
273 case 'n':
274 if (flags & SUPPRESS) /* ??? */
275 continue;
276 if (flags & SHORT)
277 *va_arg(ap, short *) = nread;
278 else if (flags & LONG)
279 *va_arg(ap, long *) = nread;
280 else
281 *va_arg(ap, int *) = nread;
282 continue;
283
284 /*
285 * Disgusting backwards compatibility hacks. XXX
286 */
287 case '\0': /* compat */
288 return (EOF);
289
290 default: /* compat */
291 if (isupper(c))
292 flags |= LONG;
293 c = CT_INT;
294 ccfn = (u_quad_t (*) __P((const char *, char **, int)))strtoq;
295 base = 10;
296 break;
297 }
298
299 /*
300 * We have a conversion that requires input.
301 */
302 if (fp->_r <= 0 && __srefill(fp))
303 goto input_failure;
304
305 /*
306 * Consume leading white space, except for formats
307 * that suppress this.
308 */
309 if ((flags & NOSKIP) == 0) {
310 while (isspace(*fp->_p)) {
311 nread++;
312 if (--fp->_r > 0)
313 fp->_p++;
314 else if (__srefill(fp))
315 goto input_failure;
316 }
317 /*
318 * Note that there is at least one character in
319 * the buffer, so conversions that do not set NOSKIP
320 * ca no longer result in an input failure.
321 */
322 }
323
324 /*
325 * Do the conversion.
326 */
327 switch (c) {
328
329 case CT_CHAR:
330 /* scan arbitrary characters (sets NOSKIP) */
331 if (width == 0)
332 width = 1;
333 if (flags & SUPPRESS) {
334 size_t sum = 0;
335 for (;;) {
336 if ((n = fp->_r) < width) {
337 sum += n;
338 width -= n;
339 fp->_p += n;
340 if (__srefill(fp)) {
341 if (sum == 0)
342 goto input_failure;
343 break;
344 }
345 } else {
346 sum += width;
347 fp->_r -= width;
348 fp->_p += width;
349 break;
350 }
351 }
352 nread += sum;
353 } else {
354 size_t r = fread((void *)va_arg(ap, char *), 1,
355 width, fp);
356
357 if (r == 0)
358 goto input_failure;
359 nread += r;
360 nassigned++;
361 }
362 break;
363
364 case CT_CCL:
365 /* scan a (nonempty) character class (sets NOSKIP) */
366 if (width == 0)
367 width = ~0; /* `infinity' */
368 /* take only those things in the class */
369 if (flags & SUPPRESS) {
370 n = 0;
371 while (ccltab[*fp->_p]) {
372 n++, fp->_r--, fp->_p++;
373 if (--width == 0)
374 break;
375 if (fp->_r <= 0 && __srefill(fp)) {
376 if (n == 0)
377 goto input_failure;
378 break;
379 }
380 }
381 if (n == 0)
382 goto match_failure;
383 } else {
384 p0 = p = va_arg(ap, char *);
385 while (ccltab[*fp->_p]) {
386 fp->_r--;
387 *p++ = *fp->_p++;
388 if (--width == 0)
389 break;
390 if (fp->_r <= 0 && __srefill(fp)) {
391 if (p == p0)
392 goto input_failure;
393 break;
394 }
395 }
396 n = p - p0;
397 if (n == 0)
398 goto match_failure;
399 *p = 0;
400 nassigned++;
401 }
402 nread += n;
403 break;
404
405 case CT_STRING:
406 /* like CCL, but zero-length string OK, & no NOSKIP */
407 if (width == 0)
408 width = ~0;
409 if (flags & SUPPRESS) {
410 n = 0;
411 while (!isspace(*fp->_p)) {
412 n++, fp->_r--, fp->_p++;
413 if (--width == 0)
414 break;
415 if (fp->_r <= 0 && __srefill(fp))
416 break;
417 }
418 nread += n;
419 } else {
420 p0 = p = va_arg(ap, char *);
421 while (!isspace(*fp->_p)) {
422 fp->_r--;
423 *p++ = *fp->_p++;
424 if (--width == 0)
425 break;
426 if (fp->_r <= 0 && __srefill(fp))
427 break;
428 }
429 *p = 0;
430 nread += p - p0;
431 nassigned++;
432 }
433 continue;
434
435 case CT_INT:
436 /* scan an integer as if by strtoq/strtouq */
437 #ifdef hardway
438 if (width == 0 || width > sizeof(buf) - 1)
439 width = sizeof(buf) - 1;
440 #else
441 /* size_t is unsigned, hence this optimisation */
442 if (--width > sizeof(buf) - 2)
443 width = sizeof(buf) - 2;
444 width++;
445 #endif
446 flags |= SIGNOK | NDIGITS | NZDIGITS;
447 for (p = buf; width; width--) {
448 c = *fp->_p;
449 /*
450 * Switch on the character; `goto ok'
451 * if we accept it as a part of number.
452 */
453 switch (c) {
454
455 /*
456 * The digit 0 is always legal, but is
457 * special. For %i conversions, if no
458 * digits (zero or nonzero) have been
459 * scanned (only signs), we will have
460 * base==0. In that case, we should set
461 * it to 8 and enable 0x prefixing.
462 * Also, if we have not scanned zero digits
463 * before this, do not turn off prefixing
464 * (someone else will turn it off if we
465 * have scanned any nonzero digits).
466 */
467 case '0':
468 if (base == 0) {
469 base = 8;
470 flags |= PFXOK;
471 }
472 if (flags & NZDIGITS)
473 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
474 else
475 flags &= ~(SIGNOK|PFXOK|NDIGITS);
476 goto ok;
477
478 /* 1 through 7 always legal */
479 case '1': case '2': case '3':
480 case '4': case '5': case '6': case '7':
481 base = basefix[base];
482 flags &= ~(SIGNOK | PFXOK | NDIGITS);
483 goto ok;
484
485 /* digits 8 and 9 ok iff decimal or hex */
486 case '8': case '9':
487 base = basefix[base];
488 if (base <= 8)
489 break; /* not legal here */
490 flags &= ~(SIGNOK | PFXOK | NDIGITS);
491 goto ok;
492
493 /* letters ok iff hex */
494 case 'A': case 'B': case 'C':
495 case 'D': case 'E': case 'F':
496 case 'a': case 'b': case 'c':
497 case 'd': case 'e': case 'f':
498 /* no need to fix base here */
499 if (base <= 10)
500 break; /* not legal here */
501 flags &= ~(SIGNOK | PFXOK | NDIGITS);
502 goto ok;
503
504 /* sign ok only as first character */
505 case '+': case '-':
506 if (flags & SIGNOK) {
507 flags &= ~SIGNOK;
508 goto ok;
509 }
510 break;
511
512 /* x ok iff flag still set & 2nd char */
513 case 'x': case 'X':
514 if (flags & PFXOK && p == buf + 1) {
515 base = 16; /* if %i */
516 flags &= ~PFXOK;
517 goto ok;
518 }
519 break;
520 }
521
522 /*
523 * If we got here, c is not a legal character
524 * for a number. Stop accumulating digits.
525 */
526 break;
527 ok:
528 /*
529 * c is legal: store it and look at the next.
530 */
531 *p++ = c;
532 if (--fp->_r > 0)
533 fp->_p++;
534 else if (__srefill(fp))
535 break; /* EOF */
536 }
537 /*
538 * If we had only a sign, it is no good; push
539 * back the sign. If the number ends in `x',
540 * it was [sign] '0' 'x', so push back the x
541 * and treat it as [sign] '0'.
542 */
543 if (flags & NDIGITS) {
544 if (p > buf)
545 (void) ungetc(*(u_char *)--p, fp);
546 goto match_failure;
547 }
548 c = ((u_char *)p)[-1];
549 if (c == 'x' || c == 'X') {
550 --p;
551 (void) ungetc(c, fp);
552 }
553 if ((flags & SUPPRESS) == 0) {
554 u_quad_t res;
555
556 *p = 0;
557 res = (*ccfn)(buf, (char **)NULL, base);
558 if (flags & POINTER)
559 *va_arg(ap, void **) =
560 (void *)(long)res;
561 else if (flags & QUAD)
562 *va_arg(ap, quad_t *) = res;
563 else if (flags & LONG)
564 *va_arg(ap, long *) = res;
565 else if (flags & SHORT)
566 *va_arg(ap, short *) = res;
567 else
568 *va_arg(ap, int *) = res;
569 nassigned++;
570 }
571 nread += p - buf;
572 break;
573
574 #ifdef FLOATING_POINT
575 case CT_FLOAT:
576 /* scan a floating point number as if by strtod */
577 #ifdef hardway
578 if (width == 0 || width > sizeof(buf) - 1)
579 width = sizeof(buf) - 1;
580 #else
581 /* size_t is unsigned, hence this optimisation */
582 if (--width > sizeof(buf) - 2)
583 width = sizeof(buf) - 2;
584 width++;
585 #endif
586 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
587 for (p = buf; width; width--) {
588 c = *fp->_p;
589 /*
590 * This code mimicks the integer conversion
591 * code, but is much simpler.
592 */
593 switch (c) {
594
595 case '0': case '1': case '2': case '3':
596 case '4': case '5': case '6': case '7':
597 case '8': case '9':
598 flags &= ~(SIGNOK | NDIGITS);
599 goto fok;
600
601 case '+': case '-':
602 if (flags & SIGNOK) {
603 flags &= ~SIGNOK;
604 goto fok;
605 }
606 break;
607 case '.':
608 if (flags & DPTOK) {
609 flags &= ~(SIGNOK | DPTOK);
610 goto fok;
611 }
612 break;
613 case 'e': case 'E':
614 /* no exponent without some digits */
615 if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
616 flags =
617 (flags & ~(EXPOK|DPTOK)) |
618 SIGNOK | NDIGITS;
619 goto fok;
620 }
621 break;
622 }
623 break;
624 fok:
625 *p++ = c;
626 if (--fp->_r > 0)
627 fp->_p++;
628 else if (__srefill(fp))
629 break; /* EOF */
630 }
631 /*
632 * If no digits, might be missing exponent digits
633 * (just give back the exponent) or might be missing
634 * regular digits, but had sign and/or decimal point.
635 */
636 if (flags & NDIGITS) {
637 if (flags & EXPOK) {
638 /* no digits at all */
639 while (p > buf)
640 ungetc(*(u_char *)--p, fp);
641 goto match_failure;
642 }
643 /* just a bad exponent (e and maybe sign) */
644 c = *(u_char *)--p;
645 if (c != 'e' && c != 'E') {
646 (void) ungetc(c, fp);/* sign */
647 c = *(u_char *)--p;
648 }
649 (void) ungetc(c, fp);
650 }
651 if ((flags & SUPPRESS) == 0) {
652 double res;
653
654 *p = 0;
655 res = strtod(buf, (char **) NULL);
656 if (flags & LONGDBL)
657 *va_arg(ap, long double *) = res;
658 else if (flags & LONG)
659 *va_arg(ap, double *) = res;
660 else
661 *va_arg(ap, float *) = res;
662 nassigned++;
663 }
664 nread += p - buf;
665 break;
666 #endif /* FLOATING_POINT */
667 }
668 }
669 input_failure:
670 return (nassigned ? nassigned : -1);
671 match_failure:
672 return (nassigned);
673 }
674
675 /*
676 * Fill in the given table from the scanset at the given format
677 * (just after `['). Return a pointer to the character past the
678 * closing `]'. The table has a 1 wherever characters should be
679 * considered part of the scanset.
680 */
681 static u_char *
682 __sccl(tab, fmt)
683 register char *tab;
684 register u_char *fmt;
685 {
686 register int c, n, v;
687
688 /* first `clear' the whole table */
689 c = *fmt++; /* first char hat => negated scanset */
690 if (c == '^') {
691 v = 1; /* default => accept */
692 c = *fmt++; /* get new first char */
693 } else
694 v = 0; /* default => reject */
695 /* should probably use memset here */
696 for (n = 0; n < 256; n++)
697 tab[n] = v;
698 if (c == 0)
699 return (fmt - 1);/* format ended before closing ] */
700
701 /*
702 * Now set the entries corresponding to the actual scanset
703 * to the opposite of the above.
704 *
705 * The first character may be ']' (or '-') without being special;
706 * the last character may be '-'.
707 */
708 v = 1 - v;
709 for (;;) {
710 tab[c] = v; /* take character c */
711 doswitch:
712 n = *fmt++; /* and examine the next */
713 switch (n) {
714
715 case 0: /* format ended too soon */
716 return (fmt - 1);
717
718 case '-':
719 /*
720 * A scanset of the form
721 * [01+-]
722 * is defined as `the digit 0, the digit 1,
723 * the character +, the character -', but
724 * the effect of a scanset such as
725 * [a-zA-Z0-9]
726 * is implementation defined. The V7 Unix
727 * scanf treats `a-z' as `the letters a through
728 * z', but treats `a-a' as `the letter a, the
729 * character -, and the letter a'.
730 *
731 * For compatibility, the `-' is not considerd
732 * to define a range if the character following
733 * it is either a close bracket (required by ANSI)
734 * or is not numerically greater than the character
735 * we just stored in the table (c).
736 */
737 n = *fmt;
738 if (n == ']' || n < c) {
739 c = '-';
740 break; /* resume the for(;;) */
741 }
742 fmt++;
743 do { /* fill in the range */
744 tab[++c] = v;
745 } while (c < n);
746 #if 1 /* XXX another disgusting compatibility hack */
747 /*
748 * Alas, the V7 Unix scanf also treats formats
749 * such as [a-c-e] as `the letters a through e'.
750 * This too is permitted by the standard....
751 */
752 goto doswitch;
753 #else
754 c = *fmt++;
755 if (c == 0)
756 return (fmt - 1);
757 if (c == ']')
758 return (fmt);
759 #endif
760 break;
761
762 case ']': /* end of scanset */
763 return (fmt);
764
765 default: /* just another character */
766 c = n;
767 break;
768 }
769 }
770 /* NOTREACHED */
771 }
772