Home | History | Annotate | Line # | Download | only in fmt
fmt.c revision 1.30
      1 /*	$NetBSD: fmt.c,v 1.30 2008/04/13 03:46:30 dholland Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1980, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #ifndef lint
     34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
     35 	The Regents of the University of California.  All rights reserved.\n");
     36 #endif /* not lint */
     37 
     38 #ifndef lint
     39 #if 0
     40 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
     41 #endif
     42 __RCSID("$NetBSD: fmt.c,v 1.30 2008/04/13 03:46:30 dholland Exp $");
     43 #endif /* not lint */
     44 
     45 #include <ctype.h>
     46 #include <locale.h>
     47 #include <stdio.h>
     48 #include <stdlib.h>
     49 #include <unistd.h>
     50 #include <errno.h>
     51 #include <err.h>
     52 #include <limits.h>
     53 #include <string.h>
     54 #include "buffer.h"
     55 
     56 /*
     57  * fmt -- format the concatenation of input files or standard input
     58  * onto standard output.  Designed for use with Mail ~|
     59  *
     60  * Syntax : fmt [ goal [ max ] ] [ name ... ]
     61  * Authors: Kurt Shoens (UCB) 12/7/78;
     62  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
     63  */
     64 
     65 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
     66 #define GOAL_LENGTH 65
     67 #define MAX_LENGTH 75
     68 static size_t	goal_length;	/* Target or goal line length in output */
     69 static size_t	max_length;	/* Max line length in output */
     70 static size_t	pfx;		/* Current leading blank count */
     71 static int	raw;		/* Don't treat mail specially */
     72 static int	lineno;		/* Current input line */
     73 static int	mark;		/* Last place we saw a head line */
     74 static int	center;
     75 static struct buffer outbuf;
     76 
     77 static const char	*headnames[] = {"To", "Subject", "Cc", 0};
     78 
     79 static void	usage(void) __dead;
     80 static int 	getnum(const char *, const char *, size_t *, int);
     81 static void	fmt(FILE *);
     82 static int	ispref(const char *, const char *);
     83 static void	leadin(void);
     84 static void	oflush(void);
     85 static void	pack(const char *, size_t);
     86 static void	prefix(const struct buffer *, int);
     87 static void	split(const char *, int);
     88 static void	tabulate(struct buffer *);
     89 
     90 
     91 int		ishead(const char *);
     92 
     93 /*
     94  * Drive the whole formatter by managing input files.  Also,
     95  * cause initialization of the output stuff and flush it out
     96  * at the end.
     97  */
     98 
     99 int
    100 main(int argc, char **argv)
    101 {
    102 	FILE *fi;
    103 	int errs = 0;
    104 	int compat = 1;
    105 	int c;
    106 
    107 	goal_length = GOAL_LENGTH;
    108 	max_length = MAX_LENGTH;
    109 	buf_init(&outbuf);
    110 	lineno = 1;
    111 	mark = -10;
    112 
    113 	setprogname(*argv);
    114 	(void)setlocale(LC_ALL, "");
    115 
    116 	while ((c = getopt(argc, argv, "Cg:m:r")) != -1)
    117 		switch (c) {
    118 		case 'C':
    119 			center++;
    120 			break;
    121 		case 'g':
    122 			(void)getnum(optarg, "goal", &goal_length, 1);
    123 			compat = 0;
    124 			break;
    125 		case 'm':
    126 			(void)getnum(optarg, "max", &max_length, 1);
    127 			compat = 0;
    128 			break;
    129 		case 'r':
    130 			raw++;
    131 			break;
    132 		default:
    133 			usage();
    134 		}
    135 
    136 	argc -= optind;
    137 	argv += optind;
    138 
    139 	/*
    140 	 * compatibility with old usage.
    141 	 */
    142 	if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) {
    143 		argv++;
    144 		argc--;
    145 		if (argc > 0 && getnum(*argv, "max", &max_length, 0)) {
    146 			argv++;
    147 			argc--;
    148 		}
    149 	}
    150 
    151 	if (max_length <= goal_length) {
    152 		errx(1, "Max length (%zu) must be greater than goal "
    153 		    "length (%zu)", max_length, goal_length);
    154 	}
    155 	if (argc == 0) {
    156 		fmt(stdin);
    157 		oflush();
    158 		return 0;
    159 	}
    160 	for (;argc; argc--, argv++) {
    161 		if ((fi = fopen(*argv, "r")) == NULL) {
    162 			warn("Cannot open `%s'", *argv);
    163 			errs++;
    164 			continue;
    165 		}
    166 		fmt(fi);
    167 		(void)fclose(fi);
    168 	}
    169 	oflush();
    170 	buf_end(&outbuf);
    171 	return errs;
    172 }
    173 
    174 static void
    175 usage(void)
    176 {
    177 	(void)fprintf(stderr,
    178 	    "Usage: %s [-Cr] [-g <goal>] [-m <max>] [<files>..]\n"
    179 	    "\t %s [-Cr] [<goal>] [<max>] [<files>]\n",
    180 	    getprogname(), getprogname());
    181 	exit(1);
    182 }
    183 
    184 static int
    185 getnum(const char *str, const char *what, size_t *res, int badnum)
    186 {
    187 	unsigned long ul;
    188 	char *ep;
    189 
    190 	errno = 0;
    191 	ul = strtoul(str, &ep, 0);
    192         if (*str != '\0' && *ep == '\0') {
    193 		 if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX)
    194 			errx(1, "%s number `%s' too big", what, str);
    195 		*res = (size_t)ul;
    196 		return 1;
    197 	} else if (badnum)
    198 		errx(1, "Bad %s number `%s'", what, str);
    199 
    200 	return 0;
    201 }
    202 
    203 /*
    204  * Read up characters from the passed input file, forming lines,
    205  * doing ^H processing, expanding tabs, stripping trailing blanks,
    206  * and sending each line down for analysis.
    207  */
    208 static void
    209 fmt(FILE *fi)
    210 {
    211 	struct buffer lbuf, cbuf;
    212 	char *cp, *cp2;
    213 	int c, add_space;
    214 	size_t len, col, i;
    215 
    216 	if (center) {
    217 		for (;;) {
    218 			cp = fgetln(fi, &len);
    219 			if (!cp)
    220 				return;
    221 
    222 			/* skip over leading space */
    223 			while (len > 0) {
    224 				if (!isspace((unsigned char)*cp))
    225 					break;
    226 				cp++;
    227 				len--;
    228 			}
    229 
    230 			/* clear trailing space */
    231 			while (len > 0) {
    232 				if (!isspace((unsigned char)cp[len-1]))
    233 					break;
    234 				len--;
    235 			}
    236 
    237 			if (len == 0) {
    238 				/* blank line */
    239 				(void)putchar('\n');
    240 				continue;
    241 			}
    242 
    243 			if (goal_length > len) {
    244 				for (i = 0; i < (goal_length - len) / 2; i++) {
    245 					(void)putchar(' ');
    246 				}
    247 			}
    248 			for (i = 0; i < len; i++) {
    249 				(void)putchar(cp[i]);
    250 			}
    251 			(void)putchar('\n');
    252 		}
    253 	}
    254 
    255 	buf_init(&lbuf);
    256 	buf_init(&cbuf);
    257 	c = getc(fi);
    258 
    259 	while (c != EOF) {
    260 		/*
    261 		 * Collect a line, doing ^H processing.
    262 		 * Leave tabs for now.
    263 		 */
    264 		buf_reset(&lbuf);
    265 		while (c != '\n' && c != EOF) {
    266 			if (c == '\b') {
    267 				(void)buf_unputc(&lbuf);
    268 				c = getc(fi);
    269 				continue;
    270 			}
    271 			if(!(isprint(c) || c == '\t' || c >= 160)) {
    272 				c = getc(fi);
    273 				continue;
    274 			}
    275 			buf_putc(&lbuf, c);
    276 			c = getc(fi);
    277 		}
    278 		buf_putc(&lbuf, '\0');
    279 		(void)buf_unputc(&lbuf);
    280 		add_space = c != EOF;
    281 
    282 		/*
    283 		 * Expand tabs on the way.
    284 		 */
    285 		col = 0;
    286 		cp = lbuf.bptr;
    287 		buf_reset(&cbuf);
    288 		while ((c = *cp++) != '\0') {
    289 			if (c != '\t') {
    290 				col++;
    291 				buf_putc(&cbuf, c);
    292 				continue;
    293 			}
    294 			do {
    295 				buf_putc(&cbuf, ' ');
    296 				col++;
    297 			} while ((col & 07) != 0);
    298 		}
    299 
    300 		/*
    301 		 * Swipe trailing blanks from the line.
    302 		 */
    303 		for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
    304 			continue;
    305 		cbuf.ptr = cp2 + 1;
    306 		buf_putc(&cbuf, '\0');
    307 		(void)buf_unputc(&cbuf);
    308 		prefix(&cbuf, add_space);
    309 		if (c != EOF)
    310 			c = getc(fi);
    311 	}
    312 	buf_end(&cbuf);
    313 	buf_end(&lbuf);
    314 }
    315 
    316 /*
    317  * Take a line devoid of tabs and other garbage and determine its
    318  * blank prefix.  If the indent changes, call for a linebreak.
    319  * If the input line is blank, echo the blank line on the output.
    320  * Finally, if the line minus the prefix is a mail header, try to keep
    321  * it on a line by itself.
    322  */
    323 static void
    324 prefix(const struct buffer *buf, int add_space)
    325 {
    326 	const char *cp;
    327 	const char **hp;
    328 	size_t np;
    329 	int h;
    330 
    331 	if (buf->ptr == buf->bptr) {
    332 		oflush();
    333 		(void)putchar('\n');
    334 		return;
    335 	}
    336 	for (cp = buf->bptr; *cp == ' '; cp++)
    337 		continue;
    338 	np = cp - buf->bptr;
    339 
    340 	/*
    341 	 * The following horrible expression attempts to avoid linebreaks
    342 	 * when the indent changes due to a paragraph.
    343 	 */
    344 	if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
    345 		oflush();
    346 	if (!raw) {
    347 		if ((h = ishead(cp)) != 0) {
    348 			oflush();
    349 			mark = lineno;
    350 		}
    351 		if (lineno - mark < 3 && lineno - mark > 0)
    352 			for (hp = &headnames[0]; *hp != NULL; hp++)
    353 				if (ispref(*hp, cp)) {
    354 					h = 1;
    355 					oflush();
    356 					break;
    357 				}
    358 		if (!h && (h = (*cp == '.')))
    359 			oflush();
    360 	} else
    361 		h = 0;
    362 	pfx = np;
    363 	if (h) {
    364 		pack(cp, (size_t)(buf->ptr - cp));
    365 		oflush();
    366 	} else
    367 		split(cp, add_space);
    368 	lineno++;
    369 }
    370 
    371 /*
    372  * Split up the passed line into output "words" which are
    373  * maximal strings of non-blanks with the blank separation
    374  * attached at the end.  Pass these words along to the output
    375  * line packer.
    376  */
    377 static void
    378 split(const char line[], int add_space)
    379 {
    380 	const char *cp;
    381 	struct buffer word;
    382 	size_t wlen;
    383 
    384 	buf_init(&word);
    385 	cp = line;
    386 	while (*cp) {
    387 		buf_reset(&word);
    388 		wlen = 0;
    389 
    390 		/*
    391 		 * Collect a 'word,' allowing it to contain escaped white
    392 		 * space.
    393 		 */
    394 		while (*cp && *cp != ' ') {
    395 			if (*cp == '\\' && isspace((unsigned char)cp[1]))
    396 				buf_putc(&word, *cp++);
    397 			buf_putc(&word, *cp++);
    398 			wlen++;
    399 		}
    400 
    401 		/*
    402 		 * Guarantee a space at end of line. Two spaces after end of
    403 		 * sentence punctuation.
    404 		 */
    405 		if (*cp == '\0' && add_space) {
    406 			buf_putc(&word, ' ');
    407 			if (strchr(".:!", cp[-1]))
    408 				buf_putc(&word, ' ');
    409 		}
    410 		while (*cp == ' ')
    411 			buf_putc(&word, *cp++);
    412 
    413 		buf_putc(&word, '\0');
    414 		(void)buf_unputc(&word);
    415 
    416 		pack(word.bptr, wlen);
    417 	}
    418 	buf_end(&word);
    419 }
    420 
    421 /*
    422  * Output section.
    423  * Build up line images from the words passed in.  Prefix
    424  * each line with correct number of blanks.
    425  *
    426  * At the bottom of this whole mess, leading tabs are reinserted.
    427  */
    428 
    429 /*
    430  * Pack a word onto the output line.  If this is the beginning of
    431  * the line, push on the appropriately-sized string of blanks first.
    432  * If the word won't fit on the current line, flush and begin a new
    433  * line.  If the word is too long to fit all by itself on a line,
    434  * just give it its own and hope for the best.
    435  *
    436  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
    437  *	goal length, take it.  If not, then check to see if the line
    438  *	will be over the max length; if so put the word on the next
    439  *	line.  If not, check to see if the line will be closer to the
    440  *	goal length with or without the word and take it or put it on
    441  *	the next line accordingly.
    442  */
    443 
    444 static void
    445 pack(const char *word, size_t wlen)
    446 {
    447 	const char *cp;
    448 	size_t s, t;
    449 
    450 	if (outbuf.bptr == outbuf.ptr)
    451 		leadin();
    452 	/*
    453 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
    454 	 * length of the line before the word is added; t is now the length
    455 	 * of the line after the word is added
    456 	 */
    457 	s = outbuf.ptr - outbuf.bptr;
    458 	t = wlen + s;
    459 	if ((t <= goal_length) || ((t <= max_length) &&
    460 	    (s <= goal_length) && (t - goal_length <= goal_length - s))) {
    461 		/*
    462 		 * In like flint!
    463 		 */
    464 		for (cp = word; *cp;)
    465 			buf_putc(&outbuf, *cp++);
    466 		return;
    467 	}
    468 	if (s > pfx) {
    469 		oflush();
    470 		leadin();
    471 	}
    472 	for (cp = word; *cp;)
    473 		buf_putc(&outbuf, *cp++);
    474 }
    475 
    476 /*
    477  * If there is anything on the current output line, send it on
    478  * its way.  Reset outbuf.
    479  */
    480 static void
    481 oflush(void)
    482 {
    483 	if (outbuf.bptr == outbuf.ptr)
    484 		return;
    485 	buf_putc(&outbuf, '\0');
    486 	(void)buf_unputc(&outbuf);
    487 	tabulate(&outbuf);
    488 	buf_reset(&outbuf);
    489 }
    490 
    491 /*
    492  * Take the passed line buffer, insert leading tabs where possible, and
    493  * output on standard output (finally).
    494  */
    495 static void
    496 tabulate(struct buffer *buf)
    497 {
    498 	char *cp;
    499 	size_t b, t;
    500 
    501 	/*
    502 	 * Toss trailing blanks in the output line.
    503 	 */
    504 	for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--)
    505 		continue;
    506 	*++cp = '\0';
    507 
    508 	/*
    509 	 * Count the leading blank space and tabulate.
    510 	 */
    511 	for (cp = buf->bptr; *cp == ' '; cp++)
    512 		continue;
    513 	b = cp - buf->bptr;
    514 	t = b / 8;
    515 	b = b % 8;
    516 	if (t > 0)
    517 		do
    518 			(void)putchar('\t');
    519 		while (--t);
    520 	if (b > 0)
    521 		do
    522 			(void)putchar(' ');
    523 		while (--b);
    524 	while (*cp)
    525 		(void)putchar(*cp++);
    526 	(void)putchar('\n');
    527 }
    528 
    529 /*
    530  * Initialize the output line with the appropriate number of
    531  * leading blanks.
    532  */
    533 static void
    534 leadin(void)
    535 {
    536 	size_t b;
    537 
    538 	buf_reset(&outbuf);
    539 
    540 	for (b = 0; b < pfx; b++)
    541 		buf_putc(&outbuf, ' ');
    542 }
    543 
    544 /*
    545  * Is s1 a prefix of s2??
    546  */
    547 static int
    548 ispref(const char *s1, const char *s2)
    549 {
    550 
    551 	while (*s1++ == *s2)
    552 		continue;
    553 	return *s1 == '\0';
    554 }
    555