Home | History | Annotate | Line # | Download | only in fmt
      1 /*	$NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1980, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #ifndef lint
     34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\
     35  The Regents of the University of California.  All rights reserved.");
     36 #endif /* not lint */
     37 
     38 #ifndef lint
     39 #if 0
     40 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
     41 #endif
     42 __RCSID("$NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $");
     43 #endif /* not lint */
     44 
     45 #include <wctype.h>
     46 #include <locale.h>
     47 #include <stdio.h>
     48 #include <stdlib.h>
     49 #include <unistd.h>
     50 #include <errno.h>
     51 #include <err.h>
     52 #include <limits.h>
     53 #include <string.h>
     54 #include <locale.h>
     55 #include "buffer.h"
     56 
     57 /*
     58  * fmt -- format the concatenation of input files or standard input
     59  * onto standard output.  Designed for use with Mail ~|
     60  *
     61  * Syntax : fmt [ goal [ max ] ] [ name ... ]
     62  * Authors: Kurt Shoens (UCB) 12/7/78;
     63  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
     64  */
     65 
     66 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
     67 #define GOAL_LENGTH 65
     68 #define MAX_LENGTH 75
     69 static size_t	goal_length;	/* Target or goal line length in output */
     70 static size_t	max_length;	/* Max line length in output */
     71 static size_t	pfx;		/* Current leading blank count */
     72 static int	raw;		/* Don't treat mail specially */
     73 static int	lineno;		/* Current input line */
     74 static int	mark;		/* Last place we saw a head line */
     75 static int	center;
     76 static struct buffer outbuf;
     77 
     78 static const wchar_t *headnames[] = { L"To", L"Subject", L"Cc", NULL };
     79 
     80 static void	usage(void) __dead;
     81 static int 	getnum(const char *, const char *, size_t *, int);
     82 static void	fmt(FILE *);
     83 static int	ispref(const wchar_t *, const wchar_t *);
     84 static void	leadin(void);
     85 static void	oflush(void);
     86 static void	pack(const wchar_t *, size_t);
     87 static void	prefix(const struct buffer *, int);
     88 static void	split(const wchar_t *, int);
     89 static void	tabulate(struct buffer *);
     90 
     91 
     92 int		ishead(const wchar_t *);
     93 
     94 /*
     95  * Drive the whole formatter by managing input files.  Also,
     96  * cause initialization of the output stuff and flush it out
     97  * at the end.
     98  */
     99 
    100 int
    101 main(int argc, char **argv)
    102 {
    103 	FILE *fi;
    104 	int errs = 0;
    105 	int compat = 1;
    106 	int c;
    107 
    108 	goal_length = GOAL_LENGTH;
    109 	max_length = MAX_LENGTH;
    110 	buf_init(&outbuf);
    111 	lineno = 1;
    112 	mark = -10;
    113 
    114 	setprogname(*argv);
    115 	(void)setlocale(LC_ALL, "");
    116 
    117 	while ((c = getopt(argc, argv, "Cg:m:rw:")) != -1)
    118 		switch (c) {
    119 		case 'C':
    120 			center++;
    121 			break;
    122 		case 'g':
    123 			(void)getnum(optarg, "goal", &goal_length, 1);
    124 			compat = 0;
    125 			break;
    126 		case 'm':
    127 		case 'w':
    128 			(void)getnum(optarg, "max", &max_length, 1);
    129 			compat = 0;
    130 			break;
    131 		case 'r':
    132 			raw++;
    133 			break;
    134 		default:
    135 			usage();
    136 		}
    137 
    138 	argc -= optind;
    139 	argv += optind;
    140 
    141 	/*
    142 	 * compatibility with old usage.
    143 	 */
    144 	if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) {
    145 		argv++;
    146 		argc--;
    147 		if (argc > 0 && getnum(*argv, "max", &max_length, 0)) {
    148 			argv++;
    149 			argc--;
    150 		}
    151 	}
    152 
    153 	if (max_length <= goal_length) {
    154 		errx(1, "Max length (%zu) must be greater than goal "
    155 		    "length (%zu)", max_length, goal_length);
    156 	}
    157 	if (argc == 0) {
    158 		fmt(stdin);
    159 		oflush();
    160 		return 0;
    161 	}
    162 	for (;argc; argc--, argv++) {
    163 		if ((fi = fopen(*argv, "r")) == NULL) {
    164 			warn("Cannot open `%s'", *argv);
    165 			errs++;
    166 			continue;
    167 		}
    168 		fmt(fi);
    169 		(void)fclose(fi);
    170 	}
    171 	oflush();
    172 	buf_end(&outbuf);
    173 	return errs;
    174 }
    175 
    176 static void
    177 usage(void)
    178 {
    179 	(void)fprintf(stderr,
    180 	    "Usage: %s [-Cr] [-g <goal>] [-m|w <max>] [<files>..]\n"
    181 	    "\t %s [-Cr] [<goal>] [<max>] [<files>]\n",
    182 	    getprogname(), getprogname());
    183 	exit(1);
    184 }
    185 
    186 static int
    187 getnum(const char *str, const char *what, size_t *res, int badnum)
    188 {
    189 	unsigned long ul;
    190 	char *ep;
    191 
    192 	errno = 0;
    193 	ul = strtoul(str, &ep, 0);
    194         if (*str != '\0' && *ep == '\0') {
    195 		 if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX)
    196 			errx(1, "%s number `%s' too big", what, str);
    197 		*res = (size_t)ul;
    198 		return 1;
    199 	} else if (badnum)
    200 		errx(1, "Bad %s number `%s'", what, str);
    201 
    202 	return 0;
    203 }
    204 
    205 /*
    206  * Read up characters from the passed input file, forming lines,
    207  * doing ^H processing, expanding tabs, stripping trailing blanks,
    208  * and sending each line down for analysis.
    209  */
    210 static void
    211 fmt(FILE *fi)
    212 {
    213 	struct buffer lbuf, cbuf;
    214 	wchar_t *cp, *cp2;
    215 	wint_t c;
    216 	int add_space;
    217 	size_t len, col, i;
    218 
    219 	if (center) {
    220 		for (;;) {
    221 			cp = fgetwln(fi, &len);
    222 			if (!cp)
    223 				return;
    224 
    225 			/* skip over leading space */
    226 			while (len > 0) {
    227 				if (!iswspace(*cp))
    228 					break;
    229 				cp++;
    230 				len--;
    231 			}
    232 
    233 			/* clear trailing space */
    234 			while (len > 0) {
    235 				if (!iswspace((unsigned char)cp[len-1]))
    236 					break;
    237 				len--;
    238 			}
    239 
    240 			if (len == 0) {
    241 				/* blank line */
    242 				(void)putwchar(L'\n');
    243 				continue;
    244 			}
    245 
    246 			if (goal_length > len) {
    247 				for (i = 0; i < (goal_length - len) / 2; i++) {
    248 					(void)putwchar(L' ');
    249 				}
    250 			}
    251 			for (i = 0; i < len; i++) {
    252 				(void)putwchar(cp[i]);
    253 			}
    254 			(void)putwchar(L'\n');
    255 		}
    256 	}
    257 
    258 	buf_init(&lbuf);
    259 	buf_init(&cbuf);
    260 	c = getwc(fi);
    261 
    262 	while (c != WEOF) {
    263 		/*
    264 		 * Collect a line, doing ^H processing.
    265 		 * Leave tabs for now.
    266 		 */
    267 		buf_reset(&lbuf);
    268 		while (c != '\n' && c != WEOF) {
    269 			if (c == '\b') {
    270 				(void)buf_unputc(&lbuf);
    271 				c = getwc(fi);
    272 				continue;
    273 			}
    274 			if(!(iswprint(c) || c == '\t' || c >= 160)) {
    275 				c = getwc(fi);
    276 				continue;
    277 			}
    278 			buf_putc(&lbuf, c);
    279 			c = getwc(fi);
    280 		}
    281 		buf_putc(&lbuf, '\0');
    282 		(void)buf_unputc(&lbuf);
    283 		add_space = c != WEOF;
    284 
    285 		/*
    286 		 * Expand tabs on the way.
    287 		 */
    288 		col = 0;
    289 		cp = lbuf.bptr;
    290 		buf_reset(&cbuf);
    291 		while ((c = *cp++) != '\0') {
    292 			if (c != '\t') {
    293 				col++;
    294 				buf_putc(&cbuf, c);
    295 				continue;
    296 			}
    297 			do {
    298 				buf_putc(&cbuf, ' ');
    299 				col++;
    300 			} while ((col & 07) != 0);
    301 		}
    302 
    303 		/*
    304 		 * Swipe trailing blanks from the line.
    305 		 */
    306 		for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
    307 			continue;
    308 		cbuf.ptr = cp2 + 1;
    309 		buf_putc(&cbuf, '\0');
    310 		(void)buf_unputc(&cbuf);
    311 		prefix(&cbuf, add_space);
    312 		if (c != WEOF)
    313 			c = getwc(fi);
    314 	}
    315 	buf_end(&cbuf);
    316 	buf_end(&lbuf);
    317 }
    318 
    319 /*
    320  * Take a line devoid of tabs and other garbage and determine its
    321  * blank prefix.  If the indent changes, call for a linebreak.
    322  * If the input line is blank, echo the blank line on the output.
    323  * Finally, if the line minus the prefix is a mail header, try to keep
    324  * it on a line by itself.
    325  */
    326 static void
    327 prefix(const struct buffer *buf, int add_space)
    328 {
    329 	const wchar_t *cp;
    330 	const wchar_t **hp;
    331 	size_t np;
    332 	int h;
    333 
    334 	if (buf->ptr == buf->bptr) {
    335 		oflush();
    336 		(void)putwchar(L'\n');
    337 		return;
    338 	}
    339 	for (cp = buf->bptr; *cp == ' '; cp++)
    340 		continue;
    341 	np = cp - buf->bptr;
    342 
    343 	/*
    344 	 * The following horrible expression attempts to avoid linebreaks
    345 	 * when the indent changes due to a paragraph.
    346 	 */
    347 	if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
    348 		oflush();
    349 	if (!raw) {
    350 		if ((h = ishead(cp)) != 0) {
    351 			oflush();
    352 			mark = lineno;
    353 		}
    354 		if (lineno - mark < 3 && lineno - mark > 0)
    355 			for (hp = &headnames[0]; *hp != NULL; hp++)
    356 				if (ispref(*hp, cp)) {
    357 					h = 1;
    358 					oflush();
    359 					break;
    360 				}
    361 		if (!h && (h = (*cp == '.')))
    362 			oflush();
    363 	} else
    364 		h = 0;
    365 	pfx = np;
    366 	if (h) {
    367 		pack(cp, (size_t)(buf->ptr - cp));
    368 		oflush();
    369 	} else
    370 		split(cp, add_space);
    371 	lineno++;
    372 }
    373 
    374 /*
    375  * Split up the passed line into output "words" which are
    376  * maximal strings of non-blanks with the blank separation
    377  * attached at the end.  Pass these words along to the output
    378  * line packer.
    379  */
    380 static void
    381 split(const wchar_t line[], int add_space)
    382 {
    383 	const wchar_t *cp;
    384 	struct buffer word;
    385 	size_t wlen;
    386 
    387 	buf_init(&word);
    388 	cp = line;
    389 	while (*cp) {
    390 		buf_reset(&word);
    391 		wlen = 0;
    392 
    393 		/*
    394 		 * Collect a 'word,' allowing it to contain escaped white
    395 		 * space.
    396 		 */
    397 		while (*cp && *cp != ' ') {
    398 			if (*cp == '\\' && iswspace(cp[1]))
    399 				buf_putc(&word, *cp++);
    400 			buf_putc(&word, *cp++);
    401 			wlen++;
    402 		}
    403 
    404 		/*
    405 		 * Guarantee a space at end of line. Two spaces after end of
    406 		 * sentence punctuation.
    407 		 */
    408 		if (*cp == '\0' && add_space) {
    409 			buf_putc(&word, ' ');
    410 			if (strchr(".:!", cp[-1]))
    411 				buf_putc(&word, ' ');
    412 		}
    413 		while (*cp == ' ')
    414 			buf_putc(&word, *cp++);
    415 
    416 		buf_putc(&word, '\0');
    417 		(void)buf_unputc(&word);
    418 
    419 		pack(word.bptr, wlen);
    420 	}
    421 	buf_end(&word);
    422 }
    423 
    424 /*
    425  * Output section.
    426  * Build up line images from the words passed in.  Prefix
    427  * each line with correct number of blanks.
    428  *
    429  * At the bottom of this whole mess, leading tabs are reinserted.
    430  */
    431 
    432 /*
    433  * Pack a word onto the output line.  If this is the beginning of
    434  * the line, push on the appropriately-sized string of blanks first.
    435  * If the word won't fit on the current line, flush and begin a new
    436  * line.  If the word is too long to fit all by itself on a line,
    437  * just give it its own and hope for the best.
    438  *
    439  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
    440  *	goal length, take it.  If not, then check to see if the line
    441  *	will be over the max length; if so put the word on the next
    442  *	line.  If not, check to see if the line will be closer to the
    443  *	goal length with or without the word and take it or put it on
    444  *	the next line accordingly.
    445  */
    446 
    447 static void
    448 pack(const wchar_t *word, size_t wlen)
    449 {
    450 	const wchar_t *cp;
    451 	size_t s, t;
    452 
    453 	if (outbuf.bptr == outbuf.ptr)
    454 		leadin();
    455 	/*
    456 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
    457 	 * length of the line before the word is added; t is now the length
    458 	 * of the line after the word is added
    459 	 */
    460 	s = outbuf.ptr - outbuf.bptr;
    461 	t = wlen + s;
    462 	if ((t <= goal_length) || ((t <= max_length) &&
    463 	    (s <= goal_length) && (t - goal_length <= goal_length - s))) {
    464 		/*
    465 		 * In like flint!
    466 		 */
    467 		for (cp = word; *cp;)
    468 			buf_putc(&outbuf, *cp++);
    469 		return;
    470 	}
    471 	if (s > pfx) {
    472 		oflush();
    473 		leadin();
    474 	}
    475 	for (cp = word; *cp;)
    476 		buf_putc(&outbuf, *cp++);
    477 }
    478 
    479 /*
    480  * If there is anything on the current output line, send it on
    481  * its way.  Reset outbuf.
    482  */
    483 static void
    484 oflush(void)
    485 {
    486 	if (outbuf.bptr == outbuf.ptr)
    487 		return;
    488 	buf_putc(&outbuf, '\0');
    489 	(void)buf_unputc(&outbuf);
    490 	tabulate(&outbuf);
    491 	buf_reset(&outbuf);
    492 }
    493 
    494 /*
    495  * Take the passed line buffer, insert leading tabs where possible, and
    496  * output on standard output (finally).
    497  */
    498 static void
    499 tabulate(struct buffer *buf)
    500 {
    501 	wchar_t *cp;
    502 	size_t b, t;
    503 
    504 	/*
    505 	 * Toss trailing blanks in the output line.
    506 	 */
    507 	for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--)
    508 		continue;
    509 	*++cp = '\0';
    510 
    511 	/*
    512 	 * Count the leading blank space and tabulate.
    513 	 */
    514 	for (cp = buf->bptr; *cp == ' '; cp++)
    515 		continue;
    516 	b = cp - buf->bptr;
    517 	t = b / 8;
    518 	b = b % 8;
    519 	if (t > 0)
    520 		do
    521 			(void)putwchar(L'\t');
    522 		while (--t);
    523 	if (b > 0)
    524 		do
    525 			(void)putwchar(L' ');
    526 		while (--b);
    527 	while (*cp)
    528 		(void)putwchar(*cp++);
    529 	(void)putwchar(L'\n');
    530 }
    531 
    532 /*
    533  * Initialize the output line with the appropriate number of
    534  * leading blanks.
    535  */
    536 static void
    537 leadin(void)
    538 {
    539 	size_t b;
    540 
    541 	buf_reset(&outbuf);
    542 
    543 	for (b = 0; b < pfx; b++)
    544 		buf_putc(&outbuf, ' ');
    545 }
    546 
    547 /*
    548  * Is s1 a prefix of s2??
    549  */
    550 static int
    551 ispref(const wchar_t *s1, const wchar_t *s2)
    552 {
    553 
    554 	while (*s1++ == *s2)
    555 		continue;
    556 	return *s1 == '\0';
    557 }
    558