vfscanf.c revision 1.15.2.1 1 /* $NetBSD: vfscanf.c,v 1.15.2.1 1996/09/16 18:40:58 jtc Exp $ */
2
3 /*-
4 * Copyright (c) 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Chris Torek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #if defined(LIBC_SCCS) && !defined(lint)
40 #if 0
41 static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93";
42 #endif
43 static char rcsid[] = "$NetBSD: vfscanf.c,v 1.15.2.1 1996/09/16 18:40:58 jtc Exp $";
44 #endif /* LIBC_SCCS and not lint */
45
46 #include "namespace.h"
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <ctype.h>
50 #if __STDC__
51 #include <stdarg.h>
52 #else
53 #include <varargs.h>
54 #endif
55 #include "local.h"
56
57 #ifdef FLOATING_POINT
58 #include "floatio.h"
59 #endif
60
61 #define BUF 513 /* Maximum length of numeric string. */
62
63 /*
64 * Flags used during conversion.
65 */
66 #define LONG 0x01 /* l: long or double */
67 #define LONGDBL 0x02 /* L: long double; unimplemented */
68 #define SHORT 0x04 /* h: short */
69 #define QUAD 0x08 /* q: quad */
70 #define SUPPRESS 0x10 /* suppress assignment */
71 #define POINTER 0x20 /* weird %p pointer (`fake hex') */
72 #define NOSKIP 0x40 /* do not skip blanks */
73
74 /*
75 * The following are used in numeric conversions only:
76 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
77 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
78 */
79 #define SIGNOK 0x080 /* +/- is (still) legal */
80 #define NDIGITS 0x100 /* no digits detected */
81
82 #define DPTOK 0x200 /* (float) decimal point is still legal */
83 #define EXPOK 0x400 /* (float) exponent (e+3, etc) still legal */
84
85 #define PFXOK 0x200 /* 0x prefix is (still) legal */
86 #define NZDIGITS 0x400 /* no zero digits detected */
87
88 /*
89 * Conversion types.
90 */
91 #define CT_CHAR 0 /* %c conversion */
92 #define CT_CCL 1 /* %[...] conversion */
93 #define CT_STRING 2 /* %s conversion */
94 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */
95 #define CT_FLOAT 4 /* floating, i.e., strtod */
96
97 #define u_char unsigned char
98 #define u_long unsigned long
99
100 static u_char *__sccl();
101
102 /*
103 * vfscanf
104 */
105 int
106 __svfscanf(fp, fmt0, ap)
107 register FILE *fp;
108 char const *fmt0;
109 _BSD_VA_LIST_ ap;
110 {
111 register u_char *fmt = (u_char *)fmt0;
112 register int c; /* character from format, or conversion */
113 register size_t width; /* field width, or 0 */
114 register char *p; /* points into all kinds of strings */
115 register int n; /* handy integer */
116 register int flags; /* flags as defined above */
117 register char *p0; /* saves original value of p when necessary */
118 int nassigned; /* number of fields assigned */
119 int nread; /* number of characters consumed from fp */
120 int base; /* base argument to strtoq/strtouq */
121 u_quad_t (*ccfn)(); /* conversion function (strtoq/strtouq) */
122 char ccltab[256]; /* character class table for %[...] */
123 char buf[BUF]; /* buffer for numeric conversions */
124
125 /* `basefix' is used to avoid `if' tests in the integer scanner */
126 static short basefix[17] =
127 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
128
129 nassigned = 0;
130 nread = 0;
131 base = 0; /* XXX just to keep gcc happy */
132 ccfn = NULL; /* XXX just to keep gcc happy */
133 for (;;) {
134 c = *fmt++;
135 if (c == 0)
136 return (nassigned);
137 if (isspace(c)) {
138 for (;;) {
139 if (fp->_r <= 0 && __srefill(fp))
140 return (nassigned);
141 if (!isspace(*fp->_p))
142 break;
143 nread++, fp->_r--, fp->_p++;
144 }
145 continue;
146 }
147 if (c != '%')
148 goto literal;
149 width = 0;
150 flags = 0;
151 /*
152 * switch on the format. continue if done;
153 * break once format type is derived.
154 */
155 again: c = *fmt++;
156 switch (c) {
157 case '%':
158 literal:
159 if (fp->_r <= 0 && __srefill(fp))
160 goto input_failure;
161 if (*fp->_p != c)
162 goto match_failure;
163 fp->_r--, fp->_p++;
164 nread++;
165 continue;
166
167 case '*':
168 flags |= SUPPRESS;
169 goto again;
170 case 'L':
171 flags |= LONGDBL;
172 goto again;
173 case 'h':
174 flags |= SHORT;
175 goto again;
176 case 'l':
177 if (*fmt == 'l') {
178 fmt++;
179 flags |= QUAD;
180 } else {
181 flags |= LONG;
182 }
183 goto again;
184 case 'q':
185 flags |= QUAD;
186 goto again;
187
188 case '0': case '1': case '2': case '3': case '4':
189 case '5': case '6': case '7': case '8': case '9':
190 width = width * 10 + c - '0';
191 goto again;
192
193 /*
194 * Conversions.
195 * Those marked `compat' are for 4.[123]BSD compatibility.
196 *
197 * (According to ANSI, E and X formats are supposed
198 * to the same as e and x. Sorry about that.)
199 */
200 case 'D': /* compat */
201 flags |= LONG;
202 /* FALLTHROUGH */
203 case 'd':
204 c = CT_INT;
205 ccfn = (u_quad_t (*)())strtoq;
206 base = 10;
207 break;
208
209 case 'i':
210 c = CT_INT;
211 ccfn = (u_quad_t (*)())strtoq;
212 base = 0;
213 break;
214
215 case 'O': /* compat */
216 flags |= LONG;
217 /* FALLTHROUGH */
218 case 'o':
219 c = CT_INT;
220 ccfn = strtouq;
221 base = 8;
222 break;
223
224 case 'u':
225 c = CT_INT;
226 ccfn = strtouq;
227 base = 10;
228 break;
229
230 case 'X':
231 case 'x':
232 flags |= PFXOK; /* enable 0x prefixing */
233 c = CT_INT;
234 ccfn = strtouq;
235 base = 16;
236 break;
237
238 #ifdef FLOATING_POINT
239 case 'E':
240 case 'G':
241 case 'e':
242 case 'f':
243 case 'g':
244 c = CT_FLOAT;
245 break;
246 #endif
247
248 case 's':
249 c = CT_STRING;
250 break;
251
252 case '[':
253 fmt = __sccl(ccltab, fmt);
254 flags |= NOSKIP;
255 c = CT_CCL;
256 break;
257
258 case 'c':
259 flags |= NOSKIP;
260 c = CT_CHAR;
261 break;
262
263 case 'p': /* pointer format is like hex */
264 flags |= POINTER | PFXOK;
265 c = CT_INT;
266 ccfn = strtouq;
267 base = 16;
268 break;
269
270 case 'n':
271 if (flags & SUPPRESS) /* ??? */
272 continue;
273 if (flags & SHORT)
274 *va_arg(ap, short *) = nread;
275 else if (flags & LONG)
276 *va_arg(ap, long *) = nread;
277 else
278 *va_arg(ap, int *) = nread;
279 continue;
280
281 /*
282 * Disgusting backwards compatibility hacks. XXX
283 */
284 case '\0': /* compat */
285 return (EOF);
286
287 default: /* compat */
288 if (isupper(c))
289 flags |= LONG;
290 c = CT_INT;
291 ccfn = (u_quad_t (*)())strtoq;
292 base = 10;
293 break;
294 }
295
296 /*
297 * We have a conversion that requires input.
298 */
299 if (fp->_r <= 0 && __srefill(fp))
300 goto input_failure;
301
302 /*
303 * Consume leading white space, except for formats
304 * that suppress this.
305 */
306 if ((flags & NOSKIP) == 0) {
307 while (isspace(*fp->_p)) {
308 nread++;
309 if (--fp->_r > 0)
310 fp->_p++;
311 else if (__srefill(fp))
312 goto input_failure;
313 }
314 /*
315 * Note that there is at least one character in
316 * the buffer, so conversions that do not set NOSKIP
317 * ca no longer result in an input failure.
318 */
319 }
320
321 /*
322 * Do the conversion.
323 */
324 switch (c) {
325
326 case CT_CHAR:
327 /* scan arbitrary characters (sets NOSKIP) */
328 if (width == 0)
329 width = 1;
330 if (flags & SUPPRESS) {
331 size_t sum = 0;
332 for (;;) {
333 if ((n = fp->_r) < width) {
334 sum += n;
335 width -= n;
336 fp->_p += n;
337 if (__srefill(fp)) {
338 if (sum == 0)
339 goto input_failure;
340 break;
341 }
342 } else {
343 sum += width;
344 fp->_r -= width;
345 fp->_p += width;
346 break;
347 }
348 }
349 nread += sum;
350 } else {
351 size_t r = fread((void *)va_arg(ap, char *), 1,
352 width, fp);
353
354 if (r == 0)
355 goto input_failure;
356 nread += r;
357 nassigned++;
358 }
359 break;
360
361 case CT_CCL:
362 /* scan a (nonempty) character class (sets NOSKIP) */
363 if (width == 0)
364 width = ~0; /* `infinity' */
365 /* take only those things in the class */
366 if (flags & SUPPRESS) {
367 n = 0;
368 while (ccltab[*fp->_p]) {
369 n++, fp->_r--, fp->_p++;
370 if (--width == 0)
371 break;
372 if (fp->_r <= 0 && __srefill(fp)) {
373 if (n == 0)
374 goto input_failure;
375 break;
376 }
377 }
378 if (n == 0)
379 goto match_failure;
380 } else {
381 p0 = p = va_arg(ap, char *);
382 while (ccltab[*fp->_p]) {
383 fp->_r--;
384 *p++ = *fp->_p++;
385 if (--width == 0)
386 break;
387 if (fp->_r <= 0 && __srefill(fp)) {
388 if (p == p0)
389 goto input_failure;
390 break;
391 }
392 }
393 n = p - p0;
394 if (n == 0)
395 goto match_failure;
396 *p = 0;
397 nassigned++;
398 }
399 nread += n;
400 break;
401
402 case CT_STRING:
403 /* like CCL, but zero-length string OK, & no NOSKIP */
404 if (width == 0)
405 width = ~0;
406 if (flags & SUPPRESS) {
407 n = 0;
408 while (!isspace(*fp->_p)) {
409 n++, fp->_r--, fp->_p++;
410 if (--width == 0)
411 break;
412 if (fp->_r <= 0 && __srefill(fp))
413 break;
414 }
415 nread += n;
416 } else {
417 p0 = p = va_arg(ap, char *);
418 while (!isspace(*fp->_p)) {
419 fp->_r--;
420 *p++ = *fp->_p++;
421 if (--width == 0)
422 break;
423 if (fp->_r <= 0 && __srefill(fp))
424 break;
425 }
426 *p = 0;
427 nread += p - p0;
428 nassigned++;
429 }
430 continue;
431
432 case CT_INT:
433 /* scan an integer as if by strtoq/strtouq */
434 #ifdef hardway
435 if (width == 0 || width > sizeof(buf) - 1)
436 width = sizeof(buf) - 1;
437 #else
438 /* size_t is unsigned, hence this optimisation */
439 if (--width > sizeof(buf) - 2)
440 width = sizeof(buf) - 2;
441 width++;
442 #endif
443 flags |= SIGNOK | NDIGITS | NZDIGITS;
444 for (p = buf; width; width--) {
445 c = *fp->_p;
446 /*
447 * Switch on the character; `goto ok'
448 * if we accept it as a part of number.
449 */
450 switch (c) {
451
452 /*
453 * The digit 0 is always legal, but is
454 * special. For %i conversions, if no
455 * digits (zero or nonzero) have been
456 * scanned (only signs), we will have
457 * base==0. In that case, we should set
458 * it to 8 and enable 0x prefixing.
459 * Also, if we have not scanned zero digits
460 * before this, do not turn off prefixing
461 * (someone else will turn it off if we
462 * have scanned any nonzero digits).
463 */
464 case '0':
465 if (base == 0) {
466 base = 8;
467 flags |= PFXOK;
468 }
469 if (flags & NZDIGITS)
470 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
471 else
472 flags &= ~(SIGNOK|PFXOK|NDIGITS);
473 goto ok;
474
475 /* 1 through 7 always legal */
476 case '1': case '2': case '3':
477 case '4': case '5': case '6': case '7':
478 base = basefix[base];
479 flags &= ~(SIGNOK | PFXOK | NDIGITS);
480 goto ok;
481
482 /* digits 8 and 9 ok iff decimal or hex */
483 case '8': case '9':
484 base = basefix[base];
485 if (base <= 8)
486 break; /* not legal here */
487 flags &= ~(SIGNOK | PFXOK | NDIGITS);
488 goto ok;
489
490 /* letters ok iff hex */
491 case 'A': case 'B': case 'C':
492 case 'D': case 'E': case 'F':
493 case 'a': case 'b': case 'c':
494 case 'd': case 'e': case 'f':
495 /* no need to fix base here */
496 if (base <= 10)
497 break; /* not legal here */
498 flags &= ~(SIGNOK | PFXOK | NDIGITS);
499 goto ok;
500
501 /* sign ok only as first character */
502 case '+': case '-':
503 if (flags & SIGNOK) {
504 flags &= ~SIGNOK;
505 goto ok;
506 }
507 break;
508
509 /* x ok iff flag still set & 2nd char */
510 case 'x': case 'X':
511 if (flags & PFXOK && p == buf + 1) {
512 base = 16; /* if %i */
513 flags &= ~PFXOK;
514 goto ok;
515 }
516 break;
517 }
518
519 /*
520 * If we got here, c is not a legal character
521 * for a number. Stop accumulating digits.
522 */
523 break;
524 ok:
525 /*
526 * c is legal: store it and look at the next.
527 */
528 *p++ = c;
529 if (--fp->_r > 0)
530 fp->_p++;
531 else if (__srefill(fp))
532 break; /* EOF */
533 }
534 /*
535 * If we had only a sign, it is no good; push
536 * back the sign. If the number ends in `x',
537 * it was [sign] '0' 'x', so push back the x
538 * and treat it as [sign] '0'.
539 */
540 if (flags & NDIGITS) {
541 if (p > buf)
542 (void) ungetc(*(u_char *)--p, fp);
543 goto match_failure;
544 }
545 c = ((u_char *)p)[-1];
546 if (c == 'x' || c == 'X') {
547 --p;
548 (void) ungetc(c, fp);
549 }
550 if ((flags & SUPPRESS) == 0) {
551 u_quad_t res;
552
553 *p = 0;
554 res = (*ccfn)(buf, (char **)NULL, base);
555 if (flags & POINTER)
556 *va_arg(ap, void **) =
557 (void *)(long)res;
558 else if (flags & QUAD)
559 *va_arg(ap, quad_t *) = res;
560 else if (flags & LONG)
561 *va_arg(ap, long *) = res;
562 else if (flags & SHORT)
563 *va_arg(ap, short *) = res;
564 else
565 *va_arg(ap, int *) = res;
566 nassigned++;
567 }
568 nread += p - buf;
569 break;
570
571 #ifdef FLOATING_POINT
572 case CT_FLOAT:
573 /* scan a floating point number as if by strtod */
574 #ifdef hardway
575 if (width == 0 || width > sizeof(buf) - 1)
576 width = sizeof(buf) - 1;
577 #else
578 /* size_t is unsigned, hence this optimisation */
579 if (--width > sizeof(buf) - 2)
580 width = sizeof(buf) - 2;
581 width++;
582 #endif
583 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
584 for (p = buf; width; width--) {
585 c = *fp->_p;
586 /*
587 * This code mimicks the integer conversion
588 * code, but is much simpler.
589 */
590 switch (c) {
591
592 case '0': case '1': case '2': case '3':
593 case '4': case '5': case '6': case '7':
594 case '8': case '9':
595 flags &= ~(SIGNOK | NDIGITS);
596 goto fok;
597
598 case '+': case '-':
599 if (flags & SIGNOK) {
600 flags &= ~SIGNOK;
601 goto fok;
602 }
603 break;
604 case '.':
605 if (flags & DPTOK) {
606 flags &= ~(SIGNOK | DPTOK);
607 goto fok;
608 }
609 break;
610 case 'e': case 'E':
611 /* no exponent without some digits */
612 if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
613 flags =
614 (flags & ~(EXPOK|DPTOK)) |
615 SIGNOK | NDIGITS;
616 goto fok;
617 }
618 break;
619 }
620 break;
621 fok:
622 *p++ = c;
623 if (--fp->_r > 0)
624 fp->_p++;
625 else if (__srefill(fp))
626 break; /* EOF */
627 }
628 /*
629 * If no digits, might be missing exponent digits
630 * (just give back the exponent) or might be missing
631 * regular digits, but had sign and/or decimal point.
632 */
633 if (flags & NDIGITS) {
634 if (flags & EXPOK) {
635 /* no digits at all */
636 while (p > buf)
637 ungetc(*(u_char *)--p, fp);
638 goto match_failure;
639 }
640 /* just a bad exponent (e and maybe sign) */
641 c = *(u_char *)--p;
642 if (c != 'e' && c != 'E') {
643 (void) ungetc(c, fp);/* sign */
644 c = *(u_char *)--p;
645 }
646 (void) ungetc(c, fp);
647 }
648 if ((flags & SUPPRESS) == 0) {
649 double res;
650
651 *p = 0;
652 res = strtod(buf, (char **) NULL);
653 if (flags & LONGDBL)
654 *va_arg(ap, long double *) = res;
655 else if (flags & LONG)
656 *va_arg(ap, double *) = res;
657 else
658 *va_arg(ap, float *) = res;
659 nassigned++;
660 }
661 nread += p - buf;
662 break;
663 #endif /* FLOATING_POINT */
664 }
665 }
666 input_failure:
667 return (nassigned ? nassigned : -1);
668 match_failure:
669 return (nassigned);
670 }
671
672 /*
673 * Fill in the given table from the scanset at the given format
674 * (just after `['). Return a pointer to the character past the
675 * closing `]'. The table has a 1 wherever characters should be
676 * considered part of the scanset.
677 */
678 static u_char *
679 __sccl(tab, fmt)
680 register char *tab;
681 register u_char *fmt;
682 {
683 register int c, n, v;
684
685 /* first `clear' the whole table */
686 c = *fmt++; /* first char hat => negated scanset */
687 if (c == '^') {
688 v = 1; /* default => accept */
689 c = *fmt++; /* get new first char */
690 } else
691 v = 0; /* default => reject */
692 /* should probably use memset here */
693 for (n = 0; n < 256; n++)
694 tab[n] = v;
695 if (c == 0)
696 return (fmt - 1);/* format ended before closing ] */
697
698 /*
699 * Now set the entries corresponding to the actual scanset
700 * to the opposite of the above.
701 *
702 * The first character may be ']' (or '-') without being special;
703 * the last character may be '-'.
704 */
705 v = 1 - v;
706 for (;;) {
707 tab[c] = v; /* take character c */
708 doswitch:
709 n = *fmt++; /* and examine the next */
710 switch (n) {
711
712 case 0: /* format ended too soon */
713 return (fmt - 1);
714
715 case '-':
716 /*
717 * A scanset of the form
718 * [01+-]
719 * is defined as `the digit 0, the digit 1,
720 * the character +, the character -', but
721 * the effect of a scanset such as
722 * [a-zA-Z0-9]
723 * is implementation defined. The V7 Unix
724 * scanf treats `a-z' as `the letters a through
725 * z', but treats `a-a' as `the letter a, the
726 * character -, and the letter a'.
727 *
728 * For compatibility, the `-' is not considerd
729 * to define a range if the character following
730 * it is either a close bracket (required by ANSI)
731 * or is not numerically greater than the character
732 * we just stored in the table (c).
733 */
734 n = *fmt;
735 if (n == ']' || n < c) {
736 c = '-';
737 break; /* resume the for(;;) */
738 }
739 fmt++;
740 do { /* fill in the range */
741 tab[++c] = v;
742 } while (c < n);
743 #if 1 /* XXX another disgusting compatibility hack */
744 /*
745 * Alas, the V7 Unix scanf also treats formats
746 * such as [a-c-e] as `the letters a through e'.
747 * This too is permitted by the standard....
748 */
749 goto doswitch;
750 #else
751 c = *fmt++;
752 if (c == 0)
753 return (fmt - 1);
754 if (c == ']')
755 return (fmt);
756 #endif
757 break;
758
759 case ']': /* end of scanset */
760 return (fmt);
761
762 default: /* just another character */
763 c = n;
764 break;
765 }
766 }
767 /* NOTREACHED */
768 }
769