vfscanf.c revision 1.8 1 /*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #if defined(LIBC_SCCS) && !defined(lint)
38 /*static char *sccsid = "from: @(#)vfscanf.c 5.7 (Berkeley) 12/14/92";*/
39 static char *rcsid = "$Id: vfscanf.c,v 1.8 1994/09/19 04:43:05 mycroft Exp $";
40 #endif /* LIBC_SCCS and not lint */
41
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <ctype.h>
45 #if __STDC__
46 #include <stdarg.h>
47 #else
48 #include <varargs.h>
49 #endif
50 #include "local.h"
51
52 #ifdef FLOATING_POINT
53 #include "floatio.h"
54 #endif
55
56 #define BUF 513 /* Maximum length of numeric string. */
57
58 /*
59 * Flags used during conversion.
60 */
61 #define LONG 0x01 /* l: long or double */
62 #define LONGDBL 0x02 /* L: long double; unimplemented */
63 #define SHORT 0x04 /* h: short */
64 #define SUPPRESS 0x08 /* suppress assignment */
65 #define POINTER 0x10 /* weird %p pointer (`fake hex') */
66 #define NOSKIP 0x20 /* do not skip blanks */
67
68 /*
69 * The following are used in numeric conversions only:
70 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
71 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
72 */
73 #define SIGNOK 0x40 /* +/- is (still) legal */
74 #define NDIGITS 0x80 /* no digits detected */
75
76 #define DPTOK 0x100 /* (float) decimal point is still legal */
77 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
78
79 #define PFXOK 0x100 /* 0x prefix is (still) legal */
80 #define NZDIGITS 0x200 /* no zero digits detected */
81
82 /*
83 * Conversion types.
84 */
85 #define CT_CHAR 0 /* %c conversion */
86 #define CT_CCL 1 /* %[...] conversion */
87 #define CT_STRING 2 /* %s conversion */
88 #define CT_INT 3 /* integer, i.e., strtol or strtoul */
89 #define CT_FLOAT 4 /* floating, i.e., strtod */
90
91 #define u_char unsigned char
92 #define u_long unsigned long
93
94 static u_char *__sccl();
95
96 /*
97 * vfscanf
98 */
99 __svfscanf(fp, fmt0, ap)
100 register FILE *fp;
101 char const *fmt0;
102 _BSD_VA_LIST_ ap;
103 {
104 register u_char *fmt = (u_char *)fmt0;
105 register int c; /* character from format, or conversion */
106 register size_t width; /* field width, or 0 */
107 register char *p; /* points into all kinds of strings */
108 register int n; /* handy integer */
109 register int flags; /* flags as defined above */
110 register char *p0; /* saves original value of p when necessary */
111 int nassigned; /* number of fields assigned */
112 int nread; /* number of characters consumed from fp */
113 int base; /* base argument to strtol/strtoul */
114 u_long (*ccfn)(); /* conversion function (strtol/strtoul) */
115 char ccltab[256]; /* character class table for %[...] */
116 char buf[BUF]; /* buffer for numeric conversions */
117
118 /* `basefix' is used to avoid `if' tests in the integer scanner */
119 static short basefix[17] =
120 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
121
122 nassigned = 0;
123 nread = 0;
124 base = 0; /* XXX just to keep gcc happy */
125 ccfn = NULL; /* XXX just to keep gcc happy */
126 for (;;) {
127 c = *fmt++;
128 if (c == 0)
129 return (nassigned);
130 if (isspace(c)) {
131 for (;;) {
132 if (fp->_r <= 0 && __srefill(fp))
133 return (nassigned);
134 if (!isspace(*fp->_p))
135 break;
136 nread++, fp->_r--, fp->_p++;
137 }
138 continue;
139 }
140 if (c != '%')
141 goto literal;
142 width = 0;
143 flags = 0;
144 /*
145 * switch on the format. continue if done;
146 * break once format type is derived.
147 */
148 again: c = *fmt++;
149 switch (c) {
150 case '%':
151 literal:
152 if (fp->_r <= 0 && __srefill(fp))
153 goto input_failure;
154 if (*fp->_p != c)
155 goto match_failure;
156 fp->_r--, fp->_p++;
157 nread++;
158 continue;
159
160 case '*':
161 flags |= SUPPRESS;
162 goto again;
163 case 'l':
164 flags |= LONG;
165 goto again;
166 case 'L':
167 flags |= LONGDBL;
168 goto again;
169 case 'h':
170 flags |= SHORT;
171 goto again;
172
173 case '0': case '1': case '2': case '3': case '4':
174 case '5': case '6': case '7': case '8': case '9':
175 width = width * 10 + c - '0';
176 goto again;
177
178 /*
179 * Conversions.
180 * Those marked `compat' are for 4.[123]BSD compatibility.
181 *
182 * (According to ANSI, E and X formats are supposed
183 * to the same as e and x. Sorry about that.)
184 */
185 case 'D': /* compat */
186 flags |= LONG;
187 /* FALLTHROUGH */
188 case 'd':
189 c = CT_INT;
190 ccfn = (u_long (*)())strtol;
191 base = 10;
192 break;
193
194 case 'i':
195 c = CT_INT;
196 ccfn = (u_long (*)())strtol;
197 base = 0;
198 break;
199
200 case 'O': /* compat */
201 flags |= LONG;
202 /* FALLTHROUGH */
203 case 'o':
204 c = CT_INT;
205 ccfn = strtoul;
206 base = 8;
207 break;
208
209 case 'u':
210 c = CT_INT;
211 ccfn = strtoul;
212 base = 10;
213 break;
214
215 case 'X': /* compat XXX */
216 flags |= LONG;
217 /* FALLTHROUGH */
218 case 'x':
219 flags |= PFXOK; /* enable 0x prefixing */
220 c = CT_INT;
221 ccfn = strtoul;
222 base = 16;
223 break;
224
225 #ifdef FLOATING_POINT
226 case 'E': /* compat XXX */
227 case 'F': /* compat */
228 flags |= LONG;
229 /* FALLTHROUGH */
230 case 'e': case 'f': case 'g':
231 c = CT_FLOAT;
232 break;
233 #endif
234
235 case 's':
236 c = CT_STRING;
237 break;
238
239 case '[':
240 fmt = __sccl(ccltab, fmt);
241 flags |= NOSKIP;
242 c = CT_CCL;
243 break;
244
245 case 'c':
246 flags |= NOSKIP;
247 c = CT_CHAR;
248 break;
249
250 case 'p': /* pointer format is like hex */
251 flags |= POINTER | PFXOK;
252 c = CT_INT;
253 ccfn = strtoul;
254 base = 16;
255 break;
256
257 case 'n':
258 if (flags & SUPPRESS) /* ??? */
259 continue;
260 if (flags & SHORT)
261 *va_arg(ap, short *) = nread;
262 else if (flags & LONG)
263 *va_arg(ap, long *) = nread;
264 else
265 *va_arg(ap, int *) = nread;
266 continue;
267
268 /*
269 * Disgusting backwards compatibility hacks. XXX
270 */
271 case '\0': /* compat */
272 return (EOF);
273
274 default: /* compat */
275 if (isupper(c))
276 flags |= LONG;
277 c = CT_INT;
278 ccfn = (u_long (*)())strtol;
279 base = 10;
280 break;
281 }
282
283 /*
284 * We have a conversion that requires input.
285 */
286 if (fp->_r <= 0 && __srefill(fp))
287 goto input_failure;
288
289 /*
290 * Consume leading white space, except for formats
291 * that suppress this.
292 */
293 if ((flags & NOSKIP) == 0) {
294 while (isspace(*fp->_p)) {
295 nread++;
296 if (--fp->_r > 0)
297 fp->_p++;
298 else if (__srefill(fp))
299 goto input_failure;
300 }
301 /*
302 * Note that there is at least one character in
303 * the buffer, so conversions that do not set NOSKIP
304 * ca no longer result in an input failure.
305 */
306 }
307
308 /*
309 * Do the conversion.
310 */
311 switch (c) {
312
313 case CT_CHAR:
314 /* scan arbitrary characters (sets NOSKIP) */
315 if (width == 0)
316 width = 1;
317 if (flags & SUPPRESS) {
318 size_t sum = 0;
319 for (;;) {
320 if ((n = fp->_r) < width) {
321 sum += n;
322 width -= n;
323 fp->_p += n;
324 if (__srefill(fp)) {
325 if (sum == 0)
326 goto input_failure;
327 break;
328 }
329 } else {
330 sum += width;
331 fp->_r -= width;
332 fp->_p += width;
333 break;
334 }
335 }
336 nread += sum;
337 } else {
338 size_t r = fread((void *)va_arg(ap, char *), 1,
339 width, fp);
340
341 if (r == 0)
342 goto input_failure;
343 nread += r;
344 nassigned++;
345 }
346 break;
347
348 case CT_CCL:
349 /* scan a (nonempty) character class (sets NOSKIP) */
350 if (width == 0)
351 width = ~0; /* `infinity' */
352 /* take only those things in the class */
353 if (flags & SUPPRESS) {
354 n = 0;
355 while (ccltab[*fp->_p]) {
356 n++, fp->_r--, fp->_p++;
357 if (--width == 0)
358 break;
359 if (fp->_r <= 0 && __srefill(fp)) {
360 if (n == 0)
361 goto input_failure;
362 break;
363 }
364 }
365 if (n == 0)
366 goto match_failure;
367 } else {
368 p0 = p = va_arg(ap, char *);
369 while (ccltab[*fp->_p]) {
370 fp->_r--;
371 *p++ = *fp->_p++;
372 if (--width == 0)
373 break;
374 if (fp->_r <= 0 && __srefill(fp)) {
375 if (p == p0)
376 goto input_failure;
377 break;
378 }
379 }
380 n = p - p0;
381 if (n == 0)
382 goto match_failure;
383 *p = 0;
384 nassigned++;
385 }
386 nread += n;
387 break;
388
389 case CT_STRING:
390 /* like CCL, but zero-length string OK, & no NOSKIP */
391 if (width == 0)
392 width = ~0;
393 if (flags & SUPPRESS) {
394 n = 0;
395 while (!isspace(*fp->_p)) {
396 n++, fp->_r--, fp->_p++;
397 if (--width == 0)
398 break;
399 if (fp->_r <= 0 && __srefill(fp))
400 break;
401 }
402 nread += n;
403 } else {
404 p0 = p = va_arg(ap, char *);
405 while (!isspace(*fp->_p)) {
406 fp->_r--;
407 *p++ = *fp->_p++;
408 if (--width == 0)
409 break;
410 if (fp->_r <= 0 && __srefill(fp))
411 break;
412 }
413 *p = 0;
414 nread += p - p0;
415 nassigned++;
416 }
417 continue;
418
419 case CT_INT:
420 /* scan an integer as if by strtol/strtoul */
421 #ifdef hardway
422 if (width == 0 || width > sizeof(buf) - 1)
423 width = sizeof(buf) - 1;
424 #else
425 /* size_t is unsigned, hence this optimisation */
426 if (--width > sizeof(buf) - 2)
427 width = sizeof(buf) - 2;
428 width++;
429 #endif
430 flags |= SIGNOK | NDIGITS | NZDIGITS;
431 for (p = buf; width; width--) {
432 c = *fp->_p;
433 /*
434 * Switch on the character; `goto ok'
435 * if we accept it as a part of number.
436 */
437 switch (c) {
438
439 /*
440 * The digit 0 is always legal, but is
441 * special. For %i conversions, if no
442 * digits (zero or nonzero) have been
443 * scanned (only signs), we will have
444 * base==0. In that case, we should set
445 * it to 8 and enable 0x prefixing.
446 * Also, if we have not scanned zero digits
447 * before this, do not turn off prefixing
448 * (someone else will turn it off if we
449 * have scanned any nonzero digits).
450 */
451 case '0':
452 if (base == 0) {
453 base = 8;
454 flags |= PFXOK;
455 }
456 if (flags & NZDIGITS)
457 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
458 else
459 flags &= ~(SIGNOK|PFXOK|NDIGITS);
460 goto ok;
461
462 /* 1 through 7 always legal */
463 case '1': case '2': case '3':
464 case '4': case '5': case '6': case '7':
465 base = basefix[base];
466 flags &= ~(SIGNOK | PFXOK | NDIGITS);
467 goto ok;
468
469 /* digits 8 and 9 ok iff decimal or hex */
470 case '8': case '9':
471 base = basefix[base];
472 if (base <= 8)
473 break; /* not legal here */
474 flags &= ~(SIGNOK | PFXOK | NDIGITS);
475 goto ok;
476
477 /* letters ok iff hex */
478 case 'A': case 'B': case 'C':
479 case 'D': case 'E': case 'F':
480 case 'a': case 'b': case 'c':
481 case 'd': case 'e': case 'f':
482 /* no need to fix base here */
483 if (base <= 10)
484 break; /* not legal here */
485 flags &= ~(SIGNOK | PFXOK | NDIGITS);
486 goto ok;
487
488 /* sign ok only as first character */
489 case '+': case '-':
490 if (flags & SIGNOK) {
491 flags &= ~SIGNOK;
492 goto ok;
493 }
494 break;
495
496 /* x ok iff flag still set & 2nd char */
497 case 'x': case 'X':
498 if (flags & PFXOK && p == buf + 1) {
499 base = 16; /* if %i */
500 flags &= ~PFXOK;
501 goto ok;
502 }
503 break;
504 }
505
506 /*
507 * If we got here, c is not a legal character
508 * for a number. Stop accumulating digits.
509 */
510 break;
511 ok:
512 /*
513 * c is legal: store it and look at the next.
514 */
515 *p++ = c;
516 if (--fp->_r > 0)
517 fp->_p++;
518 else if (__srefill(fp))
519 break; /* EOF */
520 }
521 /*
522 * If we had only a sign, it is no good; push
523 * back the sign. If the number ends in `x',
524 * it was [sign] '0' 'x', so push back the x
525 * and treat it as [sign] '0'.
526 */
527 if (flags & NDIGITS) {
528 if (p > buf)
529 (void) ungetc(*(u_char *)--p, fp);
530 goto match_failure;
531 }
532 c = ((u_char *)p)[-1];
533 if (c == 'x' || c == 'X') {
534 --p;
535 (void) ungetc(c, fp);
536 }
537 if ((flags & SUPPRESS) == 0) {
538 u_long res;
539
540 *p = 0;
541 res = (*ccfn)(buf, (char **)NULL, base);
542 if (flags & POINTER)
543 *va_arg(ap, void **) = (void *)res;
544 else if (flags & SHORT)
545 *va_arg(ap, short *) = res;
546 else if (flags & LONG)
547 *va_arg(ap, long *) = res;
548 else
549 *va_arg(ap, int *) = res;
550 nassigned++;
551 }
552 nread += p - buf;
553 break;
554
555 #ifdef FLOATING_POINT
556 case CT_FLOAT:
557 /* scan a floating point number as if by strtod */
558 #ifdef hardway
559 if (width == 0 || width > sizeof(buf) - 1)
560 width = sizeof(buf) - 1;
561 #else
562 /* size_t is unsigned, hence this optimisation */
563 if (--width > sizeof(buf) - 2)
564 width = sizeof(buf) - 2;
565 width++;
566 #endif
567 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
568 for (p = buf; width; width--) {
569 c = *fp->_p;
570 /*
571 * This code mimicks the integer conversion
572 * code, but is much simpler.
573 */
574 switch (c) {
575
576 case '0': case '1': case '2': case '3':
577 case '4': case '5': case '6': case '7':
578 case '8': case '9':
579 flags &= ~(SIGNOK | NDIGITS);
580 goto fok;
581
582 case '+': case '-':
583 if (flags & SIGNOK) {
584 flags &= ~SIGNOK;
585 goto fok;
586 }
587 break;
588 case '.':
589 if (flags & DPTOK) {
590 flags &= ~(SIGNOK | DPTOK);
591 goto fok;
592 }
593 break;
594 case 'e': case 'E':
595 /* no exponent without some digits */
596 if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
597 flags =
598 (flags & ~(EXPOK|DPTOK)) |
599 SIGNOK | NDIGITS;
600 goto fok;
601 }
602 break;
603 }
604 break;
605 fok:
606 *p++ = c;
607 if (--fp->_r > 0)
608 fp->_p++;
609 else if (__srefill(fp))
610 break; /* EOF */
611 }
612 /*
613 * If no digits, might be missing exponent digits
614 * (just give back the exponent) or might be missing
615 * regular digits, but had sign and/or decimal point.
616 */
617 if (flags & NDIGITS) {
618 if (flags & EXPOK) {
619 /* no digits at all */
620 while (p > buf)
621 ungetc(*(u_char *)--p, fp);
622 goto match_failure;
623 }
624 /* just a bad exponent (e and maybe sign) */
625 c = *(u_char *)--p;
626 if (c != 'e' && c != 'E') {
627 (void) ungetc(c, fp);/* sign */
628 c = *(u_char *)--p;
629 }
630 (void) ungetc(c, fp);
631 }
632 if ((flags & SUPPRESS) == 0) {
633 double res;
634
635 *p = 0;
636 res = strtod(buf, (char **) NULL);
637 if (flags & LONG)
638 *va_arg(ap, double *) = res;
639 else
640 *va_arg(ap, float *) = res;
641 nassigned++;
642 }
643 nread += p - buf;
644 break;
645 #endif /* FLOATING_POINT */
646 }
647 }
648 input_failure:
649 return (nassigned ? nassigned : -1);
650 match_failure:
651 return (nassigned);
652 }
653
654 /*
655 * Fill in the given table from the scanset at the given format
656 * (just after `['). Return a pointer to the character past the
657 * closing `]'. The table has a 1 wherever characters should be
658 * considered part of the scanset.
659 */
660 static u_char *
661 __sccl(tab, fmt)
662 register char *tab;
663 register u_char *fmt;
664 {
665 register int c, n, v;
666
667 /* first `clear' the whole table */
668 c = *fmt++; /* first char hat => negated scanset */
669 if (c == '^') {
670 v = 1; /* default => accept */
671 c = *fmt++; /* get new first char */
672 } else
673 v = 0; /* default => reject */
674 /* should probably use memset here */
675 for (n = 0; n < 256; n++)
676 tab[n] = v;
677 if (c == 0)
678 return (fmt - 1);/* format ended before closing ] */
679
680 /*
681 * Now set the entries corresponding to the actual scanset
682 * to the opposite of the above.
683 *
684 * The first character may be ']' (or '-') without being special;
685 * the last character may be '-'.
686 */
687 v = 1 - v;
688 for (;;) {
689 tab[c] = v; /* take character c */
690 doswitch:
691 n = *fmt++; /* and examine the next */
692 switch (n) {
693
694 case 0: /* format ended too soon */
695 return (fmt - 1);
696
697 case '-':
698 /*
699 * A scanset of the form
700 * [01+-]
701 * is defined as `the digit 0, the digit 1,
702 * the character +, the character -', but
703 * the effect of a scanset such as
704 * [a-zA-Z0-9]
705 * is implementation defined. The V7 Unix
706 * scanf treats `a-z' as `the letters a through
707 * z', but treats `a-a' as `the letter a, the
708 * character -, and the letter a'.
709 *
710 * For compatibility, the `-' is not considerd
711 * to define a range if the character following
712 * it is either a close bracket (required by ANSI)
713 * or is not numerically greater than the character
714 * we just stored in the table (c).
715 */
716 n = *fmt;
717 if (n == ']' || n < c) {
718 c = '-';
719 break; /* resume the for(;;) */
720 }
721 fmt++;
722 do { /* fill in the range */
723 tab[++c] = v;
724 } while (c < n);
725 #if 1 /* XXX another disgusting compatibility hack */
726 /*
727 * Alas, the V7 Unix scanf also treats formats
728 * such as [a-c-e] as `the letters a through e'.
729 * This too is permitted by the standard....
730 */
731 goto doswitch;
732 #else
733 c = *fmt++;
734 if (c == 0)
735 return (fmt - 1);
736 if (c == ']')
737 return (fmt);
738 #endif
739 break;
740
741 case ']': /* end of scanset */
742 return (fmt);
743
744 default: /* just another character */
745 c = n;
746 break;
747 }
748 }
749 /* NOTREACHED */
750 }
751