printf.c revision 1.59 1 /* $NetBSD: printf.c,v 1.59 2024/11/24 12:33:00 kre Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 #if !defined(BUILTIN) && !defined(SHELL)
35 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\
36 The Regents of the University of California. All rights reserved.");
37 #endif
38 #endif
39
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)printf.c 8.2 (Berkeley) 3/22/95";
43 #else
44 __RCSID("$NetBSD: printf.c,v 1.59 2024/11/24 12:33:00 kre Exp $");
45 #endif
46 #endif /* not lint */
47
48 #include <sys/types.h>
49
50 #include <ctype.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <inttypes.h>
54 #include <limits.h>
55 #include <locale.h>
56 #include <stdarg.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61
62 #ifdef __GNUC__
63 #define ESCAPE '\e'
64 #else
65 #define ESCAPE 033
66 #endif
67
68 static void conv_escape_str(char *, void (*)(int), int);
69 static char *conv_escape(char *, char *, int);
70 static char *conv_expand(const char *);
71 static wchar_t getchr(void);
72 static long double getdouble(void);
73 static int getwidth(void);
74 static intmax_t getintmax(void);
75 static char *getstr(void);
76 static char *mklong(const char *, char, char);
77 static intmax_t wide_char(const char *, int);
78 static void check_conversion(const char *, const char *);
79 static void usage(void);
80
81 static void b_count(int);
82 static void b_output(int);
83 static size_t b_length;
84 static char *b_fmt;
85
86 static int rval;
87 static char ** gargv, ** firstarg;
88 static int long_double;
89
90 #define ARGNUM ((int)(gargv - firstarg))
91
92 #ifdef BUILTIN /* csh builtin */
93 #define main progprintf
94 #endif
95
96 #ifdef SHELL /* sh (aka ash) builtin */
97 #define main printfcmd
98 #include "../../bin/sh/bltin/bltin.h"
99 #endif /* SHELL */
100
101 #define PF(f, func) { \
102 if (fieldwidth != -1) { \
103 if (precision != -1) \
104 error = printf(f, fieldwidth, precision, func); \
105 else \
106 error = printf(f, fieldwidth, func); \
107 } else if (precision != -1) \
108 error = printf(f, precision, func); \
109 else \
110 error = printf(f, func); \
111 }
112
113 #define APF(cpp, f, func) { \
114 if (fieldwidth != -1) { \
115 if (precision != -1) \
116 error = asprintf(cpp, f, fieldwidth, precision, func); \
117 else \
118 error = asprintf(cpp, f, fieldwidth, func); \
119 } else if (precision != -1) \
120 error = asprintf(cpp, f, precision, func); \
121 else \
122 error = asprintf(cpp, f, func); \
123 }
124
125 #define isodigit(c) ((c) >= '0' && (c) <= '7')
126 #define octtobin(c) ((c) - '0')
127 #define check(c, a) (c) >= (a) && (c) <= (a) + 5 ? (c) - (a) + 10
128 #define hextobin(c) (check(c, 'a') : check(c, 'A') : (c) - '0')
129
130 #ifdef main
131 int main(int, char *[]);
132 #endif
133
134 int
135 main(int argc, char *argv[])
136 {
137 char *fmt, *start;
138 int fieldwidth, precision;
139 char nextch;
140 char *format;
141 char ch;
142 int error;
143
144 #if !defined(SHELL) && !defined(BUILTIN)
145 (void)setlocale (LC_ALL, "");
146 #endif
147
148 rval = 0; /* clear for builtin versions (avoid holdover) */
149 long_double = 0;
150 clearerr(stdout); /* for the builtin version */
151
152 if (argc > 2 && strchr(argv[1], '%') == NULL) {
153 int o;
154
155 /*
156 * We only do this for argc > 2, as:
157 *
158 * for argc <= 1
159 * at best we have a bare "printf" so there cannot be
160 * any options, thus getopts() would be a waste of time.
161 * The usage() below is assured.
162 *
163 * for argc == 2
164 * There is only one arg (argv[1]) which logically must
165 * be intended to be the (required) format string for
166 * printf, without which we can do nothing so rather
167 * than usage() if it happens to start with a '-' we
168 * just avoid getopts() and treat it as a format string.
169 *
170 * Then, for argc > 2, we also skip this if there is a '%'
171 * anywhere in argv[1] as it is likely that would be intended
172 * to be the format string, rather than options, even if it
173 * starts with a '-' so we skip getopts() in that case as well.
174 *
175 * Note that this would fail should there ever be an option
176 * which takes an arbitrary string value, which could be given
177 * as -Oabc%def so should that ever become possible, remove
178 * the strchr() test above.
179 */
180
181 while ((o = getopt(argc, argv, "L")) != -1) {
182 switch (o) {
183 case 'L':
184 long_double = 1;
185 break;
186 case '?':
187 default:
188 usage();
189 return 1;
190 }
191 }
192 argc -= optind;
193 argv += optind;
194 } else {
195 argc -= 1; /* drop argv[0] (the program name) */
196 argv += 1;
197 }
198
199 if (argc < 1) { /* Nothing left at all? */
200 usage();
201 return 1;
202 }
203
204 format = *argv; /* First remaining arg is the format string */
205 firstarg = gargv = ++argv; /* remaining args are for that to consume */
206
207 #define SKIP1 "#-+ 0'"
208 #define SKIP2 "0123456789"
209 do {
210 /*
211 * Basic algorithm is to scan the format string for conversion
212 * specifications -- once one is found, find out if the field
213 * width or precision is a '*'; if it is, gather up value.
214 * Note, format strings are reused as necessary to use up the
215 * provided arguments, arguments of zero/null string are
216 * provided to use up the format string.
217 */
218
219 /* find next format specification */
220 for (fmt = format; (ch = *fmt++) != '\0';) {
221 if (ch == '\\') {
222 char c_ch;
223
224 fmt = conv_escape(fmt, &c_ch, 0);
225 putchar(c_ch);
226 continue;
227 }
228 if (ch != '%' || (*fmt == '%' && ++fmt)) {
229 (void)putchar(ch);
230 continue;
231 }
232
233 /*
234 * Ok - we've found a format specification,
235 * Save its address for a later printf().
236 */
237 start = fmt - 1;
238
239 /* skip to field width */
240 fmt += strspn(fmt, SKIP1);
241 if (*fmt == '*') {
242 fmt++;
243 fieldwidth = getwidth();
244 } else {
245 fieldwidth = -1;
246
247 /* skip to possible '.' for precision */
248 fmt += strspn(fmt, SKIP2);
249 }
250
251 if (*fmt == '.') {
252 /* get following precision */
253 fmt++;
254 if (*fmt == '*') {
255 fmt++;
256 precision = getwidth();
257 } else {
258 precision = -1;
259 fmt += strspn(fmt, SKIP2);
260 }
261 } else
262 precision = -1;
263
264 ch = *fmt;
265 if (!ch) {
266 warnx("%s: missing format character", start);
267 return 1;
268 }
269
270 /*
271 * null terminate format string to we can use it
272 * as an argument to printf.
273 */
274 nextch = fmt[1];
275 fmt[1] = 0;
276
277 switch (ch) {
278
279 case 'B': {
280 const char *p = conv_expand(getstr());
281
282 if (p == NULL)
283 goto out;
284 *fmt = 's';
285 PF(start, p);
286 if (error < 0)
287 goto out;
288 break;
289 }
290 case 'b': {
291 /*
292 * There has to be a better way to do this,
293 * but the string we generate might have
294 * embedded nulls
295 */
296 static char *a, *t;
297 char *cp = getstr();
298
299 /* Free on entry in case shell longjumped out */
300 if (a != NULL)
301 free(a);
302 a = NULL;
303 if (t != NULL)
304 free(t);
305 t = NULL;
306
307 /* Count number of bytes we want to output */
308 b_length = 0;
309 conv_escape_str(cp, b_count, 0);
310 t = malloc(b_length + 1);
311 if (t == NULL)
312 goto out;
313 (void)memset(t, 'x', b_length);
314 t[b_length] = 0;
315
316 /* Get printf to calculate the lengths */
317 *fmt = 's';
318 APF(&a, start, t);
319 if (error == -1)
320 goto out;
321 b_fmt = a;
322
323 /* Output leading spaces and data bytes */
324 conv_escape_str(cp, b_output, 1);
325
326 /* Add any trailing spaces */
327 printf("%s", b_fmt);
328 break;
329 }
330 case 'C': {
331 wchar_t p = (wchar_t)getintmax();
332 char *f = mklong(start, 'c', 'l');
333
334 PF(f, p);
335 if (error < 0)
336 goto out;
337 break;
338 }
339 case 'c': {
340 wchar_t p = getchr();
341 char *f = mklong(start, ch, 'l');
342
343 PF(f, p);
344 if (error < 0)
345 goto out;
346 break;
347 }
348 case 's': {
349 char *p = getstr();
350
351 PF(start, p);
352 if (error < 0)
353 goto out;
354 break;
355 }
356 case 'd':
357 case 'i': {
358 intmax_t p = getintmax();
359 char *f = mklong(start, ch, 'j');
360
361 PF(f, p);
362 if (error < 0)
363 goto out;
364 break;
365 }
366 case 'o':
367 case 'u':
368 case 'x':
369 case 'X': {
370 uintmax_t p = (uintmax_t)getintmax();
371 char *f = mklong(start, ch, 'j');
372
373 PF(f, p);
374 if (error < 0)
375 goto out;
376 break;
377 }
378 case 'a':
379 case 'A':
380 case 'e':
381 case 'E':
382 case 'f':
383 case 'F':
384 case 'g':
385 case 'G': {
386 long double p = getdouble();
387
388 if (long_double) {
389 char * f = mklong(start, ch, 'L');
390 PF(f, p);
391 } else {
392 double pp = (double)p;
393 PF(start, pp);
394 }
395 if (error < 0)
396 goto out;
397 break;
398 }
399 case '%':
400 /* Don't ask, but this is useful ... */
401 if (fieldwidth == 'N' && precision == 'B')
402 return 0;
403 /* FALLTHROUGH */
404 default:
405 warnx("%s: invalid directive", start);
406 return 1;
407 }
408 *fmt++ = ch;
409 *fmt = nextch;
410 /* escape if a \c was encountered */
411 if (rval & 0x100)
412 goto done;
413 }
414 } while (gargv != argv && *gargv);
415
416 done:;
417 (void)fflush(stdout);
418 if (ferror(stdout)) {
419 clearerr(stdout);
420 err(1, "write error");
421 }
422 return rval & ~0x100;
423 out:;
424 warn("print failed");
425 return 1;
426 }
427
428 /* helper functions for conv_escape_str */
429
430 static void
431 /*ARGSUSED*/
432 b_count(int ch)
433 {
434 b_length++;
435 }
436
437 /* Output one converted character for every 'x' in the 'format' */
438
439 static void
440 b_output(int ch)
441 {
442 for (;;) {
443 switch (*b_fmt++) {
444 case 0:
445 b_fmt--;
446 return;
447 case ' ':
448 putchar(' ');
449 break;
450 default:
451 putchar(ch);
452 return;
453 }
454 }
455 }
456
457
458 /*
459 * Print SysV echo(1) style escape string
460 * Halts processing string if a \c escape is encountered.
461 */
462 static void
463 conv_escape_str(char *str, void (*do_putchar)(int), int quiet)
464 {
465 int value;
466 int ch;
467 char c;
468
469 while ((ch = *str++) != '\0') {
470 if (ch != '\\') {
471 do_putchar(ch);
472 continue;
473 }
474
475 ch = *str++;
476 if (ch == 'c') {
477 /* \c as in SYSV echo - abort all processing.... */
478 rval |= 0x100;
479 break;
480 }
481
482 /*
483 * %b string octal constants are not like those in C.
484 * They start with a \0, and are followed by 0, 1, 2,
485 * or 3 octal digits.
486 */
487 if (ch == '0') {
488 int octnum = 0, i;
489
490 for (i = 0; i < 3; i++) {
491 if (!isdigit((unsigned char)*str) || *str > '7')
492 break;
493 octnum = (octnum << 3) | (*str++ - '0');
494 }
495 do_putchar(octnum);
496 continue;
497 }
498
499 /* \[M][^|-]C as defined by vis(3) */
500 if (ch == 'M' && *str == '-') {
501 do_putchar(0200 | str[1]);
502 str += 2;
503 continue;
504 }
505 if (ch == 'M' && *str == '^') {
506 str++;
507 value = 0200;
508 ch = '^';
509 } else
510 value = 0;
511 if (ch == '^') {
512 ch = *str++;
513 if (ch == '?')
514 value |= 0177;
515 else
516 value |= ch & 037;
517 do_putchar(value);
518 continue;
519 }
520
521 /* Finally test for sequences valid in the format string */
522 str = conv_escape(str - 1, &c, quiet);
523 do_putchar(c);
524 }
525 }
526
527 /*
528 * Print "standard" escape characters
529 */
530 static char *
531 conv_escape(char *str, char *conv_ch, int quiet)
532 {
533 int value = 0;
534 char ch, *begin;
535 int c;
536
537 ch = *str++;
538
539 switch (ch) {
540 case '\0':
541 if (!quiet)
542 warnx("incomplete escape sequence");
543 rval = 1;
544 value = '\\';
545 --str;
546 break;
547
548 case '0': case '1': case '2': case '3':
549 case '4': case '5': case '6': case '7':
550 str--;
551 for (c = 3; c-- && isodigit(*str); str++) {
552 value <<= 3;
553 value += octtobin(*str);
554 }
555 break;
556
557 case 'x':
558 /*
559 * Hexadecimal character constants are not required to be
560 * supported (by SuS v1) because there is no consistent
561 * way to detect the end of the constant.
562 * Supporting 2 byte constants is a compromise.
563 */
564 begin = str;
565 for (c = 2; c-- && isxdigit((unsigned char)*str); str++) {
566 value <<= 4;
567 value += hextobin(*str);
568 }
569 if (str == begin) {
570 if (!quiet)
571 warnx("\\x%s: missing hexadecimal number "
572 "in escape", begin);
573 rval = 1;
574 }
575 break;
576
577 case '\\': value = '\\'; break; /* backslash */
578 case '\'': value = '\''; break; /* single quote */
579 case '"': value = '"'; break; /* double quote */
580 case 'a': value = '\a'; break; /* alert */
581 case 'b': value = '\b'; break; /* backspace */
582 case 'e': value = ESCAPE; break; /* escape */
583 case 'E': value = ESCAPE; break; /* escape */
584 case 'f': value = '\f'; break; /* form-feed */
585 case 'n': value = '\n'; break; /* newline */
586 case 'r': value = '\r'; break; /* carriage-return */
587 case 't': value = '\t'; break; /* tab */
588 case 'v': value = '\v'; break; /* vertical-tab */
589
590 default:
591 if (!quiet)
592 warnx("unknown escape sequence `\\%c'", ch);
593 rval = 1;
594 value = ch;
595 break;
596 }
597
598 *conv_ch = (char)value;
599 return str;
600 }
601
602 /* expand a string so that everything is printable */
603
604 static char *
605 conv_expand(const char *str)
606 {
607 static char *conv_str;
608 char *cp;
609 char ch;
610
611 if (conv_str)
612 free(conv_str);
613 /* get a buffer that is definitely large enough.... */
614 conv_str = malloc(4 * strlen(str) + 1);
615 if (!conv_str)
616 return NULL;
617 cp = conv_str;
618
619 while ((ch = *(const char *)str++) != '\0') {
620 switch (ch) {
621 /* Use C escapes for expected control characters */
622 case '\\': ch = '\\'; break; /* backslash */
623 case '\'': ch = '\''; break; /* single quote */
624 case '"': ch = '"'; break; /* double quote */
625 case '\a': ch = 'a'; break; /* alert */
626 case '\b': ch = 'b'; break; /* backspace */
627 case ESCAPE: ch = 'e'; break; /* escape */
628 case '\f': ch = 'f'; break; /* form-feed */
629 case '\n': ch = 'n'; break; /* newline */
630 case '\r': ch = 'r'; break; /* carriage-return */
631 case '\t': ch = 't'; break; /* tab */
632 case '\v': ch = 'v'; break; /* vertical-tab */
633 default:
634 /* Copy anything printable */
635 if (isprint((unsigned char)ch)) {
636 *cp++ = ch;
637 continue;
638 }
639 /* Use vis(3) encodings for the rest */
640 *cp++ = '\\';
641 if (ch & 0200) {
642 *cp++ = 'M';
643 ch &= (char)~0200;
644 }
645 if (ch == 0177) {
646 *cp++ = '^';
647 *cp++ = '?';
648 continue;
649 }
650 if (ch < 040) {
651 *cp++ = '^';
652 *cp++ = ch | 0100;
653 continue;
654 }
655 *cp++ = '-';
656 *cp++ = ch;
657 continue;
658 }
659 *cp++ = '\\';
660 *cp++ = ch;
661 }
662
663 *cp = 0;
664 return conv_str;
665 }
666
667 static char *
668 mklong(const char *str, char ch, char longer)
669 {
670 static char copy[64];
671 size_t len;
672
673 len = strlen(str) + 2;
674 if (len > sizeof copy) {
675 warnx("format \"%s\" too complex", str);
676 len = 4;
677 rval = 1;
678 }
679 (void)memmove(copy, str, len - 3);
680 copy[len - 3] = longer;
681 copy[len - 2] = ch;
682 copy[len - 1] = '\0';
683 return copy;
684 }
685
686 static wchar_t
687 getchr(void)
688 {
689 if (!*gargv)
690 return 0;
691 return (wchar_t)wide_char(*gargv++, 0);
692 }
693
694 static char *
695 getstr(void)
696 {
697 static char empty[] = "";
698
699 if (!*gargv)
700 return empty;
701 return *gargv++;
702 }
703
704 static int
705 getwidth(void)
706 {
707 unsigned long val;
708 char *s, *ep;
709
710 s = *gargv;
711 if (s == NULL)
712 return 0;
713 gargv++;
714
715 errno = 0;
716 val = strtoul(s, &ep, 0);
717 if (!isdigit(*(unsigned char *)s)) {
718 warnx("Arg %d: '%s' value for '*' width/precision"
719 " must be an unsigned integer", ARGNUM, s);
720 rval = 1;
721 val = 0;
722 } else
723 check_conversion(s, ep);
724
725 /* Arbitrarily 'restrict' field widths to 1Mbyte */
726 if (val > 1 << 20) {
727 warnx("Arg %d: %s: invalid field width/precision", ARGNUM, s);
728 rval = 1;
729 return 0;
730 }
731
732 return (int)val;
733 }
734
735 static intmax_t
736 getintmax(void)
737 {
738 intmax_t val;
739 char *cp, *ep;
740
741 cp = *gargv;
742 if (cp == NULL)
743 return 0;
744 gargv++;
745
746 if (*cp == '\"' || *cp == '\'')
747 return wide_char(cp, 1);
748
749 errno = 0;
750 val = strtoimax(cp, &ep, 0);
751 if (*cp != '+' && *cp != '-' && !isdigit(*(unsigned char *)cp)) {
752 warnx("Arg %d: '%s' numeric value required", ARGNUM, cp);
753 rval = 1;
754 } else
755 check_conversion(cp, ep);
756 return val;
757 }
758
759 static long double
760 getdouble(void)
761 {
762 long double val;
763 char *ep;
764
765 if (!*gargv)
766 return 0.0;
767
768 /* This is a NetBSD extension, not required by POSIX (it is useless) */
769 if (*(ep = *gargv) == '\"' || *ep == '\'')
770 return (long double)wide_char(ep, 1);
771
772 errno = 0;
773 val = strtold(*gargv, &ep);
774 check_conversion(*gargv++, ep);
775 return val;
776 }
777
778 /*
779 * Fetch a wide character from the string given
780 *
781 * if all that character must consume the entire string
782 * after an initial leading byte (ascii char) is ignored,
783 * (used for parsing intger args using the 'X syntax)
784 *
785 * if !all then there is no requirement that the whole
786 * string be consumed (remaining characters are just ignored)
787 * but the character is to start at *p.
788 * (used for fetching the first chartacter of a string arg for %c)
789 */
790 static intmax_t
791 wide_char(const char *p, int all)
792 {
793 wchar_t wch;
794 size_t len;
795 int n;
796
797 (void)mbtowc(NULL, NULL, 0);
798 n = mbtowc(&wch, p + all, (len = strlen(p + all)) + 1);
799 if (n < 0) {
800 warn("Arg %d: %s", ARGNUM, p);
801 rval = 1;
802 } else if (all && (size_t)n != len) {
803 warnx("Arg %d: %s: not completely converted",
804 ARGNUM, p);
805 rval = 1;
806 }
807
808 return (intmax_t) wch;
809 }
810
811 static void
812 check_conversion(const char *s, const char *ep)
813 {
814 if (!*s) {
815 warnx("Arg %d: unexpected empty value ('')", ARGNUM);
816 rval = 1;
817 return;
818 }
819
820 if (*ep) {
821 if (ep == s)
822 warnx("Arg %d: %s: numeric value expected", ARGNUM, s);
823 else
824 warnx("Arg %d: %s: not completely converted",
825 ARGNUM, s);
826 rval = 1;
827 return;
828 }
829
830 if (errno == ERANGE) {
831 warnx("Arg %d: %s: %s", ARGNUM, s, strerror(ERANGE));
832 rval = 1;
833 }
834 }
835
836 static void
837 usage(void)
838 {
839 (void)fprintf(stderr,
840 "Usage: %s [-L] format [arg ...]\n", getprogname());
841 }
842