printf.c revision 1.54 1 /* $NetBSD: printf.c,v 1.54 2021/05/20 02:01:07 christos Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 #if !defined(BUILTIN) && !defined(SHELL)
35 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\
36 The Regents of the University of California. All rights reserved.");
37 #endif
38 #endif
39
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)printf.c 8.2 (Berkeley) 3/22/95";
43 #else
44 __RCSID("$NetBSD: printf.c,v 1.54 2021/05/20 02:01:07 christos Exp $");
45 #endif
46 #endif /* not lint */
47
48 #include <sys/types.h>
49
50 #include <ctype.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <inttypes.h>
54 #include <limits.h>
55 #include <locale.h>
56 #include <stdarg.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61
62 #ifdef __GNUC__
63 #define ESCAPE '\e'
64 #else
65 #define ESCAPE 033
66 #endif
67
68 static void conv_escape_str(char *, void (*)(int), int);
69 static char *conv_escape(char *, char *, int);
70 static char *conv_expand(const char *);
71 static char getchr(void);
72 static double getdouble(void);
73 static int getwidth(void);
74 static intmax_t getintmax(void);
75 static char *getstr(void);
76 static char *mklong(const char *, char);
77 static intmax_t wide_char(const char *);
78 static void check_conversion(const char *, const char *);
79 static void usage(void);
80
81 static void b_count(int);
82 static void b_output(int);
83 static size_t b_length;
84 static char *b_fmt;
85
86 static int rval;
87 static char **gargv;
88
89 #ifdef BUILTIN /* csh builtin */
90 #define main progprintf
91 #endif
92
93 #ifdef SHELL /* sh (aka ash) builtin */
94 #define main printfcmd
95 #include "../../bin/sh/bltin/bltin.h"
96 #endif /* SHELL */
97
98 #define PF(f, func) { \
99 if (fieldwidth != -1) { \
100 if (precision != -1) \
101 error = printf(f, fieldwidth, precision, func); \
102 else \
103 error = printf(f, fieldwidth, func); \
104 } else if (precision != -1) \
105 error = printf(f, precision, func); \
106 else \
107 error = printf(f, func); \
108 }
109
110 #define APF(cpp, f, func) { \
111 if (fieldwidth != -1) { \
112 if (precision != -1) \
113 error = asprintf(cpp, f, fieldwidth, precision, func); \
114 else \
115 error = asprintf(cpp, f, fieldwidth, func); \
116 } else if (precision != -1) \
117 error = asprintf(cpp, f, precision, func); \
118 else \
119 error = asprintf(cpp, f, func); \
120 }
121
122 #define isodigit(c) ((c) >= '0' && (c) <= '7')
123 #define octtobin(c) ((c) - '0')
124 #define check(c, a) (c) >= (a) && (c) <= (a) + 5 ? (c) - (a) + 10
125 #define hextobin(c) (check(c, 'a') : check(c, 'A') : (c) - '0')
126 #ifdef main
127 int main(int, char *[]);
128 #endif
129
130 int
131 main(int argc, char *argv[])
132 {
133 char *fmt, *start;
134 int fieldwidth, precision;
135 char nextch;
136 char *format;
137 char ch;
138 int error;
139
140 #if !defined(SHELL) && !defined(BUILTIN)
141 (void)setlocale (LC_ALL, "");
142 #endif
143
144 rval = 0; /* clear for builtin versions (avoid holdover) */
145 clearerr(stdout); /* for the builtin version */
146
147 /*
148 * printf does not comply with Posix XBD 12.2 - there are no opts,
149 * not even the -- end of options marker. Do not run getoot().
150 */
151 if (argc > 2 && strchr(argv[1], '%') == NULL) {
152 int o;
153
154 /*
155 * except that if there are multiple args and
156 * the first (the nominal format) contains no '%'
157 * conversions (which we will approximate as no '%'
158 * characters at all, conversions or not) then the
159 * results are unspecified, and we can do what we
160 * like. So in that case, for some backward compat
161 * to scripts which (stupidly) do:
162 * printf -- format args
163 * process this case the old way.
164 */
165
166 while ((o = getopt(argc, argv, "")) != -1) {
167 switch (o) {
168 case '?':
169 default:
170 usage();
171 return 1;
172 }
173 }
174 argc -= optind;
175 argv += optind;
176 } else {
177 argc -= 1; /* drop argv[0] (the program name) */
178 argv += 1;
179 }
180
181 if (argc < 1) {
182 usage();
183 return 1;
184 }
185
186 format = *argv;
187 gargv = ++argv;
188
189 #define SKIP1 "#-+ 0'"
190 #define SKIP2 "0123456789"
191 do {
192 /*
193 * Basic algorithm is to scan the format string for conversion
194 * specifications -- once one is found, find out if the field
195 * width or precision is a '*'; if it is, gather up value.
196 * Note, format strings are reused as necessary to use up the
197 * provided arguments, arguments of zero/null string are
198 * provided to use up the format string.
199 */
200
201 /* find next format specification */
202 for (fmt = format; (ch = *fmt++) != '\0';) {
203 if (ch == '\\') {
204 char c_ch;
205 fmt = conv_escape(fmt, &c_ch, 0);
206 putchar(c_ch);
207 continue;
208 }
209 if (ch != '%' || (*fmt == '%' && ++fmt)) {
210 (void)putchar(ch);
211 continue;
212 }
213
214 /*
215 * Ok - we've found a format specification,
216 * Save its address for a later printf().
217 */
218 start = fmt - 1;
219
220 /* skip to field width */
221 fmt += strspn(fmt, SKIP1);
222 if (*fmt == '*') {
223 fmt++;
224 fieldwidth = getwidth();
225 } else {
226 fieldwidth = -1;
227
228 /* skip to possible '.' for precision */
229 fmt += strspn(fmt, SKIP2);
230 }
231
232 if (*fmt == '.') {
233 /* get following precision */
234 fmt++;
235 if (*fmt == '*') {
236 fmt++;
237 precision = getwidth();
238 } else {
239 precision = -1;
240 fmt += strspn(fmt, SKIP2);
241 }
242 } else
243 precision = -1;
244
245 ch = *fmt;
246 if (!ch) {
247 warnx("%s: missing format character", start);
248 return 1;
249 }
250
251 /*
252 * null terminate format string to we can use it
253 * as an argument to printf.
254 */
255 nextch = fmt[1];
256 fmt[1] = 0;
257
258 switch (ch) {
259
260 case 'B': {
261 const char *p = conv_expand(getstr());
262
263 if (p == NULL)
264 goto out;
265 *fmt = 's';
266 PF(start, p);
267 if (error < 0)
268 goto out;
269 break;
270 }
271 case 'b': {
272 /*
273 * There has to be a better way to do this,
274 * but the string we generate might have
275 * embedded nulls
276 */
277 static char *a, *t;
278 char *cp = getstr();
279
280 /* Free on entry in case shell longjumped out */
281 if (a != NULL)
282 free(a);
283 a = NULL;
284 if (t != NULL)
285 free(t);
286 t = NULL;
287
288 /* Count number of bytes we want to output */
289 b_length = 0;
290 conv_escape_str(cp, b_count, 0);
291 t = malloc(b_length + 1);
292 if (t == NULL)
293 goto out;
294 (void)memset(t, 'x', b_length);
295 t[b_length] = 0;
296
297 /* Get printf to calculate the lengths */
298 *fmt = 's';
299 APF(&a, start, t);
300 if (error == -1)
301 goto out;
302 b_fmt = a;
303
304 /* Output leading spaces and data bytes */
305 conv_escape_str(cp, b_output, 1);
306
307 /* Add any trailing spaces */
308 printf("%s", b_fmt);
309 break;
310 }
311 case 'c': {
312 char p = getchr();
313
314 PF(start, p);
315 if (error < 0)
316 goto out;
317 break;
318 }
319 case 's': {
320 char *p = getstr();
321
322 PF(start, p);
323 if (error < 0)
324 goto out;
325 break;
326 }
327 case 'd':
328 case 'i': {
329 intmax_t p = getintmax();
330 char *f = mklong(start, ch);
331
332 PF(f, p);
333 if (error < 0)
334 goto out;
335 break;
336 }
337 case 'o':
338 case 'u':
339 case 'x':
340 case 'X': {
341 uintmax_t p = (uintmax_t)getintmax();
342 char *f = mklong(start, ch);
343
344 PF(f, p);
345 if (error < 0)
346 goto out;
347 break;
348 }
349 case 'a':
350 case 'A':
351 case 'e':
352 case 'E':
353 case 'f':
354 case 'F':
355 case 'g':
356 case 'G': {
357 double p = getdouble();
358
359 PF(start, p);
360 if (error < 0)
361 goto out;
362 break;
363 }
364 case '%':
365 /* Don't ask, but this is useful ... */
366 if (fieldwidth == 'N' && precision == 'B')
367 return 0;
368 /* FALLTHROUGH */
369 default:
370 warnx("%s: invalid directive", start);
371 return 1;
372 }
373 *fmt++ = ch;
374 *fmt = nextch;
375 /* escape if a \c was encountered */
376 if (rval & 0x100)
377 goto done;
378 }
379 } while (gargv != argv && *gargv);
380
381 done:
382 (void)fflush(stdout);
383 if (ferror(stdout)) {
384 clearerr(stdout);
385 err(1, "write error");
386 }
387 return rval & ~0x100;
388 out:
389 warn("print failed");
390 return 1;
391 }
392
393 /* helper functions for conv_escape_str */
394
395 static void
396 /*ARGSUSED*/
397 b_count(int ch)
398 {
399 b_length++;
400 }
401
402 /* Output one converted character for every 'x' in the 'format' */
403
404 static void
405 b_output(int ch)
406 {
407 for (;;) {
408 switch (*b_fmt++) {
409 case 0:
410 b_fmt--;
411 return;
412 case ' ':
413 putchar(' ');
414 break;
415 default:
416 putchar(ch);
417 return;
418 }
419 }
420 }
421
422
423 /*
424 * Print SysV echo(1) style escape string
425 * Halts processing string if a \c escape is encountered.
426 */
427 static void
428 conv_escape_str(char *str, void (*do_putchar)(int), int quiet)
429 {
430 int value;
431 int ch;
432 char c;
433
434 while ((ch = *str++) != '\0') {
435 if (ch != '\\') {
436 do_putchar(ch);
437 continue;
438 }
439
440 ch = *str++;
441 if (ch == 'c') {
442 /* \c as in SYSV echo - abort all processing.... */
443 rval |= 0x100;
444 break;
445 }
446
447 /*
448 * %b string octal constants are not like those in C.
449 * They start with a \0, and are followed by 0, 1, 2,
450 * or 3 octal digits.
451 */
452 if (ch == '0') {
453 int octnum = 0, i;
454 for (i = 0; i < 3; i++) {
455 if (!isdigit((unsigned char)*str) || *str > '7')
456 break;
457 octnum = (octnum << 3) | (*str++ - '0');
458 }
459 do_putchar(octnum);
460 continue;
461 }
462
463 /* \[M][^|-]C as defined by vis(3) */
464 if (ch == 'M' && *str == '-') {
465 do_putchar(0200 | str[1]);
466 str += 2;
467 continue;
468 }
469 if (ch == 'M' && *str == '^') {
470 str++;
471 value = 0200;
472 ch = '^';
473 } else
474 value = 0;
475 if (ch == '^') {
476 ch = *str++;
477 if (ch == '?')
478 value |= 0177;
479 else
480 value |= ch & 037;
481 do_putchar(value);
482 continue;
483 }
484
485 /* Finally test for sequences valid in the format string */
486 str = conv_escape(str - 1, &c, quiet);
487 do_putchar(c);
488 }
489 }
490
491 /*
492 * Print "standard" escape characters
493 */
494 static char *
495 conv_escape(char *str, char *conv_ch, int quiet)
496 {
497 int value = 0;
498 char ch, *begin;
499 int c;
500
501 ch = *str++;
502
503 switch (ch) {
504 case '\0':
505 if (!quiet)
506 warnx("incomplete escape sequence");
507 rval = 1;
508 value = '\\';
509 --str;
510 break;
511
512 case '0': case '1': case '2': case '3':
513 case '4': case '5': case '6': case '7':
514 str--;
515 for (c = 3; c-- && isodigit(*str); str++) {
516 value <<= 3;
517 value += octtobin(*str);
518 }
519 break;
520
521 case 'x':
522 /*
523 * Hexadecimal character constants are not required to be
524 * supported (by SuS v1) because there is no consistent
525 * way to detect the end of the constant.
526 * Supporting 2 byte constants is a compromise.
527 */
528 begin = str;
529 for (c = 2; c-- && isxdigit((unsigned char)*str); str++) {
530 value <<= 4;
531 value += hextobin(*str);
532 }
533 if (str == begin) {
534 if (!quiet)
535 warnx("\\x%s: missing hexadecimal number "
536 "in escape", begin);
537 rval = 1;
538 }
539 break;
540
541 case '\\': value = '\\'; break; /* backslash */
542 case '\'': value = '\''; break; /* single quote */
543 case '"': value = '"'; break; /* double quote */
544 case 'a': value = '\a'; break; /* alert */
545 case 'b': value = '\b'; break; /* backspace */
546 case 'e': value = ESCAPE; break; /* escape */
547 case 'E': value = ESCAPE; break; /* escape */
548 case 'f': value = '\f'; break; /* form-feed */
549 case 'n': value = '\n'; break; /* newline */
550 case 'r': value = '\r'; break; /* carriage-return */
551 case 't': value = '\t'; break; /* tab */
552 case 'v': value = '\v'; break; /* vertical-tab */
553
554 default:
555 if (!quiet)
556 warnx("unknown escape sequence `\\%c'", ch);
557 rval = 1;
558 value = ch;
559 break;
560 }
561
562 *conv_ch = (char)value;
563 return str;
564 }
565
566 /* expand a string so that everything is printable */
567
568 static char *
569 conv_expand(const char *str)
570 {
571 static char *conv_str;
572 char *cp;
573 char ch;
574
575 if (conv_str)
576 free(conv_str);
577 /* get a buffer that is definitely large enough.... */
578 conv_str = malloc(4 * strlen(str) + 1);
579 if (!conv_str)
580 return NULL;
581 cp = conv_str;
582
583 while ((ch = *(const char *)str++) != '\0') {
584 switch (ch) {
585 /* Use C escapes for expected control characters */
586 case '\\': ch = '\\'; break; /* backslash */
587 case '\'': ch = '\''; break; /* single quote */
588 case '"': ch = '"'; break; /* double quote */
589 case '\a': ch = 'a'; break; /* alert */
590 case '\b': ch = 'b'; break; /* backspace */
591 case ESCAPE: ch = 'e'; break; /* escape */
592 case '\f': ch = 'f'; break; /* form-feed */
593 case '\n': ch = 'n'; break; /* newline */
594 case '\r': ch = 'r'; break; /* carriage-return */
595 case '\t': ch = 't'; break; /* tab */
596 case '\v': ch = 'v'; break; /* vertical-tab */
597 default:
598 /* Copy anything printable */
599 if (isprint((unsigned char)ch)) {
600 *cp++ = ch;
601 continue;
602 }
603 /* Use vis(3) encodings for the rest */
604 *cp++ = '\\';
605 if (ch & 0200) {
606 *cp++ = 'M';
607 ch &= (char)~0200;
608 }
609 if (ch == 0177) {
610 *cp++ = '^';
611 *cp++ = '?';
612 continue;
613 }
614 if (ch < 040) {
615 *cp++ = '^';
616 *cp++ = ch | 0100;
617 continue;
618 }
619 *cp++ = '-';
620 *cp++ = ch;
621 continue;
622 }
623 *cp++ = '\\';
624 *cp++ = ch;
625 }
626
627 *cp = 0;
628 return conv_str;
629 }
630
631 static char *
632 mklong(const char *str, char ch)
633 {
634 static char copy[64];
635 size_t len;
636
637 len = strlen(str) + 2;
638 if (len > sizeof copy) {
639 warnx("format \"%s\" too complex", str);
640 len = 4;
641 rval = 1;
642 }
643 (void)memmove(copy, str, len - 3);
644 copy[len - 3] = 'j';
645 copy[len - 2] = ch;
646 copy[len - 1] = '\0';
647 return copy;
648 }
649
650 static char
651 getchr(void)
652 {
653 if (!*gargv)
654 return 0;
655 return **gargv++;
656 }
657
658 static char *
659 getstr(void)
660 {
661 static char empty[] = "";
662 if (!*gargv)
663 return empty;
664 return *gargv++;
665 }
666
667 static int
668 getwidth(void)
669 {
670 unsigned long val;
671 char *s, *ep;
672
673 s = *gargv;
674 if (s == NULL)
675 return 0;
676 gargv++;
677
678 errno = 0;
679 val = strtoul(s, &ep, 0);
680 check_conversion(s, ep);
681
682 /* Arbitrarily 'restrict' field widths to 1Mbyte */
683 if (val > 1 << 20) {
684 warnx("%s: invalid field width", s);
685 return 0;
686 }
687
688 return (int)val;
689 }
690
691 static intmax_t
692 getintmax(void)
693 {
694 intmax_t val;
695 char *cp, *ep;
696
697 cp = *gargv;
698 if (cp == NULL)
699 return 0;
700 gargv++;
701
702 if (*cp == '\"' || *cp == '\'')
703 return wide_char(cp);
704
705 errno = 0;
706 val = strtoimax(cp, &ep, 0);
707 check_conversion(cp, ep);
708 return val;
709 }
710
711 static double
712 getdouble(void)
713 {
714 double val;
715 char *ep;
716
717 if (!*gargv)
718 return 0.0;
719
720 /* This is a NetBSD extension, not required by POSIX (it is useless) */
721 if (*(ep = *gargv) == '\"' || *ep == '\'')
722 return (double)wide_char(ep);
723
724 errno = 0;
725 val = strtod(*gargv, &ep);
726 check_conversion(*gargv++, ep);
727 return val;
728 }
729
730 /*
731 * XXX This is just a placeholder for a later version which
732 * will do mbtowc() on p+1 (and after checking that all of the
733 * string has been consumed) return that value.
734 *
735 * This (mbtowc) behaviour is required by POSIX (as is the check
736 * that the whole arg is consumed).
737 *
738 * What follows is actually correct if we assume that LC_CTYPE=C
739 * (or something else similar that is a single byte charset).
740 */
741 static intmax_t
742 wide_char(const char *p)
743 {
744 intmax_t ch = (intmax_t)(unsigned char)p[1];
745
746 if (ch != 0 && p[2] != '\0') {
747 warnx("%s: not completely converted", p);
748 rval = 1;
749 }
750
751 return ch;
752 }
753
754 static void
755 check_conversion(const char *s, const char *ep)
756 {
757 if (*ep) {
758 if (ep == s)
759 warnx("%s: expected numeric value", s);
760 else
761 warnx("%s: not completely converted", s);
762 rval = 1;
763 } else if (errno == ERANGE) {
764 warnx("%s: %s", s, strerror(ERANGE));
765 rval = 1;
766 }
767 }
768
769 static void
770 usage(void)
771 {
772 (void)fprintf(stderr, "Usage: %s format [arg ...]\n", getprogname());
773 }
774