Home | History | Annotate | Line # | Download | only in fmt
fmt.c revision 1.12
      1 /*	$NetBSD: fmt.c,v 1.12 2000/09/15 11:23:17 abs Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1980, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 #ifndef lint
     38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
     39 	The Regents of the University of California.  All rights reserved.\n");
     40 #endif /* not lint */
     41 
     42 #ifndef lint
     43 #if 0
     44 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
     45 #endif
     46 __RCSID("$NetBSD: fmt.c,v 1.12 2000/09/15 11:23:17 abs Exp $");
     47 #endif /* not lint */
     48 
     49 #include <stdio.h>
     50 #include <stdlib.h>
     51 #include <string.h>
     52 #include <ctype.h>
     53 #include <locale.h>
     54 
     55 /*
     56  * fmt -- format the concatenation of input files or standard input
     57  * onto standard output.  Designed for use with Mail ~|
     58  *
     59  * Syntax : fmt [ goal [ max ] ] [ name ... ]
     60  * Authors: Kurt Shoens (UCB) 12/7/78;
     61  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
     62  */
     63 
     64 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
     65  * #define	LENGTH	72		Max line length in output
     66  */
     67 #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
     68 
     69 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
     70 #define GOAL_LENGTH 65
     71 #define MAX_LENGTH 75
     72 int	goal_length;		/* Target or goal line length in output */
     73 int	max_length;		/* Max line length in output */
     74 int	pfx;			/* Current leading blank count */
     75 int	lineno;			/* Current input line */
     76 int	mark;			/* Last place we saw a head line */
     77 int	center;
     78 
     79 char	*headnames[] = {"To", "Subject", "Cc", 0};
     80 
     81 static void	fmt __P((FILE *));
     82 static int	ispref __P((const char *, const char *));
     83 static void	leadin __P((void));
     84 static void	oflush __P((void));
     85 static void	pack __P((const char *, int));
     86 static void	prefix __P((const char *, int));
     87 static void	setout __P((void));
     88 static void	split __P((const char *, int));
     89 static void	tabulate __P((char *));
     90 
     91 int	ishead __P((const char *));
     92 int	main __P((int, char **));
     93 
     94 /*
     95  * Drive the whole formatter by managing input files.  Also,
     96  * cause initialization of the output stuff and flush it out
     97  * at the end.
     98  */
     99 
    100 int
    101 main(argc, argv)
    102 	int argc;
    103 	char **argv;
    104 {
    105 	FILE *fi;
    106 	int errs = 0;
    107 	int number;		/* LIZ@UOM 6/18/85 */
    108 
    109 	goal_length = GOAL_LENGTH;
    110 	max_length = MAX_LENGTH;
    111 	setout();
    112 	lineno = 1;
    113 	mark = -10;
    114 
    115 	setlocale(LC_ALL, "");
    116 
    117 	/*
    118 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
    119 	 */
    120 	if (argc > 1 && !strcmp(argv[1], "-c")) {
    121 		center++;
    122 		argc--;
    123 		argv++;
    124 	}
    125 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
    126 		argv++;
    127 		argc--;
    128 		goal_length = abs(number);
    129 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
    130 			argv++;
    131 			argc--;
    132 			max_length = abs(number);
    133 		}
    134 	}
    135 	if (max_length <= goal_length) {
    136 		fprintf(stderr, "Max length must be greater than %s\n",
    137 			"goal length");
    138 		exit(1);
    139 	}
    140 	if (argc < 2) {
    141 		fmt(stdin);
    142 		oflush();
    143 		exit(0);
    144 	}
    145 	while (--argc) {
    146 		if ((fi = fopen(*++argv, "r")) == NULL) {
    147 			perror(*argv);
    148 			errs++;
    149 			continue;
    150 		}
    151 		fmt(fi);
    152 		fclose(fi);
    153 	}
    154 	oflush();
    155 	exit(errs);
    156 }
    157 
    158 /*
    159  * Read up characters from the passed input file, forming lines,
    160  * doing ^H processing, expanding tabs, stripping trailing blanks,
    161  * and sending each line down for analysis.
    162  */
    163 static void
    164 fmt(fi)
    165 	FILE *fi;
    166 {
    167 	char linebuf[BUFSIZ], canonb[BUFSIZ];
    168 	char *cp, *cp2;
    169 	int c, col, add_space;
    170 
    171 	if (center) {
    172 		while (1) {
    173 			cp = fgets(linebuf, BUFSIZ, fi);
    174 			if (!cp)
    175 				return;
    176 			while (*cp && isspace(*cp))
    177 				cp++;
    178 			cp2 = cp + strlen(cp) - 1;
    179 			while (cp2 > cp && isspace(*cp2))
    180 				cp2--;
    181 			if (cp == cp2)
    182 				putchar('\n');
    183 			col = cp2 - cp;
    184 			for (c = 0; c < (goal_length-col)/2; c++)
    185 				putchar(' ');
    186 			while (cp <= cp2)
    187 				putchar(*cp++);
    188 			putchar('\n');
    189 		}
    190 	}
    191 	c = getc(fi);
    192 	while (c != EOF) {
    193 		/*
    194 		 * Collect a line, doing ^H processing.
    195 		 * Leave tabs for now.
    196 		 */
    197 		cp = linebuf;
    198 		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
    199 			if (c == '\b') {
    200 				if (cp > linebuf)
    201 					cp--;
    202 				c = getc(fi);
    203 				continue;
    204 			}
    205 			if(!(isprint(c) || c == '\t')) {
    206 				c = getc(fi);
    207 				continue;
    208 			}
    209 			*cp++ = c;
    210 			c = getc(fi);
    211 		}
    212 		*cp = '\0';
    213 
    214 		/*
    215 		 * By default, add space after the end of current input
    216 		 * (normally end of line)
    217 		 */
    218 		add_space = 1;
    219 
    220 		/*
    221 		 * If the input line is longer than linebuf buffer can hold,
    222 		 * process the data read so far as if it was a separate line -
    223 		 * if there is any whitespace character in the read data,
    224 		 * process all the data up to it, otherwise process all.
    225 		 */
    226 		if (c != '\n' && c != EOF && !isspace(c)) {
    227 			/*
    228 			 * Find out if any whitespace character has been read.
    229 			 */
    230 			for(cp2 = cp; cp2 >= linebuf
    231 				&& !isspace((unsigned char)*cp2); cp2--);
    232 
    233 			if (cp2 < linebuf) {
    234 				/*
    235 				 * ungetc() last read character so that it
    236 				 * won't get lost.
    237 				 */
    238 				ungetc(c, fi);
    239 				/*
    240 				 * Don't append space on the end in split().
    241 				 */
    242 				add_space = 0;
    243 			} else {
    244 				/*
    245 				 * To avoid splitting a word in a middle,
    246 				 * ungetc() all characters after last
    247 				 * whitespace char.
    248 				 */
    249 				while (!isspace(c) && (cp >= linebuf)) {
    250 					ungetc(c, fi);
    251 					c = *--cp;
    252 				}
    253 				*cp = '\0';
    254 			}
    255 		}
    256 
    257 		/*
    258 		 * Expand tabs on the way to canonb.
    259 		 */
    260 		col = 0;
    261 		cp = linebuf;
    262 		cp2 = canonb;
    263 		while ((c = *cp++) != 0) {
    264 			if (c != '\t') {
    265 				col++;
    266 				if (cp2-canonb < BUFSIZ-1)
    267 					*cp2++ = c;
    268 				continue;
    269 			}
    270 			do {
    271 				if (cp2-canonb < BUFSIZ-1)
    272 					*cp2++ = ' ';
    273 				col++;
    274 			} while ((col & 07) != 0);
    275 		}
    276 
    277 		/*
    278 		 * Swipe trailing blanks from the line.
    279 		 */
    280 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
    281 			;
    282 		*++cp2 = '\0';
    283 		prefix(canonb, add_space);
    284 		if (c != EOF)
    285 			c = getc(fi);
    286 	}
    287 }
    288 
    289 /*
    290  * Take a line devoid of tabs and other garbage and determine its
    291  * blank prefix.  If the indent changes, call for a linebreak.
    292  * If the input line is blank, echo the blank line on the output.
    293  * Finally, if the line minus the prefix is a mail header, try to keep
    294  * it on a line by itself.
    295  */
    296 static void
    297 prefix(line, add_space)
    298 	const char line[];
    299 	int add_space;
    300 {
    301 	const char *cp;
    302 	char **hp;
    303 	int np, h;
    304 
    305 	if (strlen(line) == 0) {
    306 		oflush();
    307 		putchar('\n');
    308 		return;
    309 	}
    310 	for (cp = line; *cp == ' '; cp++)
    311 		;
    312 	np = cp - line;
    313 
    314 	/*
    315 	 * The following horrible expression attempts to avoid linebreaks
    316 	 * when the indent changes due to a paragraph.
    317 	 */
    318 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
    319 		oflush();
    320 	if ((h = ishead(cp)) != 0)
    321 		oflush(), mark = lineno;
    322 	if (lineno - mark < 3 && lineno - mark > 0)
    323 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
    324 			if (ispref(*hp, cp)) {
    325 				h = 1;
    326 				oflush();
    327 				break;
    328 			}
    329 	if (!h && (h = (*cp == '.')))
    330 		oflush();
    331 	pfx = np;
    332 	if (h) {
    333 		pack(cp, strlen(cp));
    334 		oflush();
    335 	} else
    336 		split(cp, add_space);
    337 	lineno++;
    338 }
    339 
    340 /*
    341  * Split up the passed line into output "words" which are
    342  * maximal strings of non-blanks with the blank separation
    343  * attached at the end.  Pass these words along to the output
    344  * line packer.
    345  */
    346 static void
    347 split(line, add_space)
    348 	const char line[];
    349 	int add_space;
    350 {
    351 	const char *cp;
    352 	char *cp2;
    353 	char word[BUFSIZ];
    354 	int wordl;		/* LIZ@UOM 6/18/85 */
    355 
    356 	cp = line;
    357 	while (*cp) {
    358 		cp2 = word;
    359 		wordl = 0;	/* LIZ@UOM 6/18/85 */
    360 
    361 		/*
    362 		 * Collect a 'word,' allowing it to contain escaped white
    363 		 * space.
    364 		 */
    365 		while (*cp && *cp != ' ') {
    366 			if (*cp == '\\' && isspace((unsigned char)cp[1]))
    367 				*cp2++ = *cp++;
    368 			*cp2++ = *cp++;
    369 			wordl++;/* LIZ@UOM 6/18/85 */
    370 		}
    371 
    372 		/*
    373 		 * Guarantee a space at end of line. Two spaces after end of
    374 		 * sentence punctuation.
    375 		 */
    376 		if (*cp == '\0' && add_space) {
    377 			*cp2++ = ' ';
    378 			if (strchr(".:!", cp[-1]))
    379 				*cp2++ = ' ';
    380 		}
    381 		while (*cp == ' ')
    382 			*cp2++ = *cp++;
    383 		*cp2 = '\0';
    384 		/*
    385 		 * LIZ@UOM 6/18/85 pack(word);
    386 		 */
    387 		pack(word, wordl);
    388 	}
    389 }
    390 
    391 /*
    392  * Output section.
    393  * Build up line images from the words passed in.  Prefix
    394  * each line with correct number of blanks.  The buffer "outbuf"
    395  * contains the current partial line image, including prefixed blanks.
    396  * "outp" points to the next available space therein.  When outp is NOSTR,
    397  * there ain't nothing in there yet.  At the bottom of this whole mess,
    398  * leading tabs are reinserted.
    399  */
    400 char	outbuf[BUFSIZ];			/* Sandbagged output line image */
    401 char	*outp;				/* Pointer in above */
    402 
    403 /*
    404  * Initialize the output section.
    405  */
    406 static void
    407 setout()
    408 {
    409 	outp = NOSTR;
    410 }
    411 
    412 /*
    413  * Pack a word onto the output line.  If this is the beginning of
    414  * the line, push on the appropriately-sized string of blanks first.
    415  * If the word won't fit on the current line, flush and begin a new
    416  * line.  If the word is too long to fit all by itself on a line,
    417  * just give it its own and hope for the best.
    418  *
    419  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
    420  *	goal length, take it.  If not, then check to see if the line
    421  *	will be over the max length; if so put the word on the next
    422  *	line.  If not, check to see if the line will be closer to the
    423  *	goal length with or without the word and take it or put it on
    424  *	the next line accordingly.
    425  */
    426 
    427 /*
    428  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
    429  * pack(word)
    430  *	char word[];
    431  */
    432 static void
    433 pack(word,wl)
    434 	const char word[];
    435 	int wl;
    436 {
    437 	const char *cp;
    438 	int s, t;
    439 
    440 	if (outp == NOSTR)
    441 		leadin();
    442 	/*
    443 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
    444 	 * length of the line before the word is added; t is now the length
    445 	 * of the line after the word is added
    446 	 *	t = strlen(word);
    447 	 *	if (t+s <= LENGTH)
    448 	 */
    449 	s = outp - outbuf;
    450 	t = wl + s;
    451 	if ((t <= goal_length) ||
    452 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
    453 		/*
    454 		 * In like flint!
    455 		 */
    456 		for (cp = word; *cp; *outp++ = *cp++);
    457 		return;
    458 	}
    459 	if (s > pfx) {
    460 		oflush();
    461 		leadin();
    462 	}
    463 	for (cp = word; *cp; *outp++ = *cp++);
    464 }
    465 
    466 /*
    467  * If there is anything on the current output line, send it on
    468  * its way.  Set outp to NOSTR to indicate the absence of the current
    469  * line prefix.
    470  */
    471 static void
    472 oflush()
    473 {
    474 	if (outp == NOSTR)
    475 		return;
    476 	*outp = '\0';
    477 	tabulate(outbuf);
    478 	outp = NOSTR;
    479 }
    480 
    481 /*
    482  * Take the passed line buffer, insert leading tabs where possible, and
    483  * output on standard output (finally).
    484  */
    485 static void
    486 tabulate(line)
    487 	char line[];
    488 {
    489 	char *cp;
    490 	int b, t;
    491 
    492 	/*
    493 	 * Toss trailing blanks in the output line.
    494 	 */
    495 	cp = line + strlen(line) - 1;
    496 	while (cp >= line && *cp == ' ')
    497 		cp--;
    498 	*++cp = '\0';
    499 
    500 	/*
    501 	 * Count the leading blank space and tabulate.
    502 	 */
    503 	for (cp = line; *cp == ' '; cp++)
    504 		;
    505 	b = cp-line;
    506 	t = b >> 3;
    507 	b &= 07;
    508 	if (t > 0)
    509 		do
    510 			putc('\t', stdout);
    511 		while (--t);
    512 	if (b > 0)
    513 		do
    514 			putc(' ', stdout);
    515 		while (--b);
    516 	while (*cp)
    517 		putc(*cp++, stdout);
    518 	putc('\n', stdout);
    519 }
    520 
    521 /*
    522  * Initialize the output line with the appropriate number of
    523  * leading blanks.
    524  */
    525 static void
    526 leadin()
    527 {
    528 	int b;
    529 	char *cp;
    530 
    531 	for (b = 0, cp = outbuf; b < pfx; b++)
    532 		*cp++ = ' ';
    533 	outp = cp;
    534 }
    535 
    536 /*
    537  * Is s1 a prefix of s2??
    538  */
    539 static int
    540 ispref(s1, s2)
    541 	const char *s1, *s2;
    542 {
    543 
    544 	while (*s1++ == *s2)
    545 		;
    546 	return (*s1 == '\0');
    547 }
    548