Home | History | Annotate | Line # | Download | only in fmt
fmt.c revision 1.17
      1 /*	$NetBSD: fmt.c,v 1.17 2003/08/07 11:13:47 agc Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1980, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #ifndef lint
     34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
     35 	The Regents of the University of California.  All rights reserved.\n");
     36 #endif /* not lint */
     37 
     38 #ifndef lint
     39 #if 0
     40 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
     41 #endif
     42 __RCSID("$NetBSD: fmt.c,v 1.17 2003/08/07 11:13:47 agc Exp $");
     43 #endif /* not lint */
     44 
     45 #include <ctype.h>
     46 #include <locale.h>
     47 #include <stdio.h>
     48 #include <stdlib.h>
     49 #include <string.h>
     50 
     51 /*
     52  * fmt -- format the concatenation of input files or standard input
     53  * onto standard output.  Designed for use with Mail ~|
     54  *
     55  * Syntax : fmt [ goal [ max ] ] [ name ... ]
     56  * Authors: Kurt Shoens (UCB) 12/7/78;
     57  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
     58  */
     59 
     60 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
     61  * #define	LENGTH	72		Max line length in output
     62  */
     63 #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
     64 
     65 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
     66 #define GOAL_LENGTH 65
     67 #define MAX_LENGTH 75
     68 int	goal_length;		/* Target or goal line length in output */
     69 int	max_length;		/* Max line length in output */
     70 int	pfx;			/* Current leading blank count */
     71 int	lineno;			/* Current input line */
     72 int	mark;			/* Last place we saw a head line */
     73 int	center;
     74 
     75 char	*headnames[] = {"To", "Subject", "Cc", 0};
     76 
     77 static void	fmt(FILE *);
     78 static int	ispref(const char *, const char *);
     79 static void	leadin(void);
     80 static void	oflush(void);
     81 static void	pack(const char *, int);
     82 static void	prefix(const char *, int);
     83 static void	setout(void);
     84 static void	split(const char *, int);
     85 static void	tabulate(char *);
     86 
     87 int	ishead(const char *);
     88 int	main(int, char **);
     89 
     90 /*
     91  * Drive the whole formatter by managing input files.  Also,
     92  * cause initialization of the output stuff and flush it out
     93  * at the end.
     94  */
     95 
     96 int
     97 main(int argc, char **argv)
     98 {
     99 	FILE *fi;
    100 	int errs = 0;
    101 	int number;		/* LIZ@UOM 6/18/85 */
    102 
    103 	goal_length = GOAL_LENGTH;
    104 	max_length = MAX_LENGTH;
    105 	setout();
    106 	lineno = 1;
    107 	mark = -10;
    108 
    109 	setlocale(LC_ALL, "");
    110 
    111 	/*
    112 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
    113 	 */
    114 	if (argc > 1 && !strcmp(argv[1], "-C")) {
    115 		center++;
    116 		argc--;
    117 		argv++;
    118 	}
    119 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
    120 		argv++;
    121 		argc--;
    122 		goal_length = abs(number);
    123 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
    124 			argv++;
    125 			argc--;
    126 			max_length = abs(number);
    127 		}
    128 	}
    129 	if (max_length <= goal_length) {
    130 		fprintf(stderr, "Max length must be greater than %s\n",
    131 			"goal length");
    132 		exit(1);
    133 	}
    134 	if (argc < 2) {
    135 		fmt(stdin);
    136 		oflush();
    137 		exit(0);
    138 	}
    139 	while (--argc) {
    140 		if ((fi = fopen(*++argv, "r")) == NULL) {
    141 			perror(*argv);
    142 			errs++;
    143 			continue;
    144 		}
    145 		fmt(fi);
    146 		fclose(fi);
    147 	}
    148 	oflush();
    149 	exit(errs);
    150 }
    151 
    152 /*
    153  * Read up characters from the passed input file, forming lines,
    154  * doing ^H processing, expanding tabs, stripping trailing blanks,
    155  * and sending each line down for analysis.
    156  */
    157 static void
    158 fmt(FILE *fi)
    159 {
    160 	char linebuf[BUFSIZ], canonb[BUFSIZ];
    161 	char *cp, *cp2;
    162 	int c, col, add_space;
    163 
    164 	if (center) {
    165 		while (1) {
    166 			cp = fgets(linebuf, BUFSIZ, fi);
    167 			if (!cp)
    168 				return;
    169 			while (*cp && isspace(*cp))
    170 				cp++;
    171 			cp2 = cp + strlen(cp) - 1;
    172 			while (cp2 > cp && isspace(*cp2))
    173 				cp2--;
    174 			if (cp == cp2)
    175 				putchar('\n');
    176 			col = cp2 - cp;
    177 			for (c = 0; c < (goal_length-col)/2; c++)
    178 				putchar(' ');
    179 			while (cp <= cp2)
    180 				putchar(*cp++);
    181 			putchar('\n');
    182 		}
    183 	}
    184 	c = getc(fi);
    185 	while (c != EOF) {
    186 		/*
    187 		 * Collect a line, doing ^H processing.
    188 		 * Leave tabs for now.
    189 		 */
    190 		cp = linebuf;
    191 		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
    192 			if (c == '\b') {
    193 				if (cp > linebuf)
    194 					cp--;
    195 				c = getc(fi);
    196 				continue;
    197 			}
    198 			if(!(isprint(c) || c == '\t' || c >= 160)) {
    199 				c = getc(fi);
    200 				continue;
    201 			}
    202 			*cp++ = c;
    203 			c = getc(fi);
    204 		}
    205 		*cp = '\0';
    206 
    207 		/*
    208 		 * By default, add space after the end of current input
    209 		 * (normally end of line)
    210 		 */
    211 		add_space = 1;
    212 
    213 		/*
    214 		 * If the input line is longer than linebuf buffer can hold,
    215 		 * process the data read so far as if it was a separate line -
    216 		 * if there is any whitespace character in the read data,
    217 		 * process all the data up to it, otherwise process all.
    218 		 */
    219 		if (c != '\n' && c != EOF && !isspace(c)) {
    220 			/*
    221 			 * Find out if any whitespace character has been read.
    222 			 */
    223 			for(cp2 = cp; cp2 >= linebuf
    224 				&& !isspace((unsigned char)*cp2); cp2--);
    225 
    226 			if (cp2 < linebuf) {
    227 				/*
    228 				 * ungetc() last read character so that it
    229 				 * won't get lost.
    230 				 */
    231 				ungetc(c, fi);
    232 				/*
    233 				 * Don't append space on the end in split().
    234 				 */
    235 				add_space = 0;
    236 			} else {
    237 				/*
    238 				 * To avoid splitting a word in a middle,
    239 				 * ungetc() all characters after last
    240 				 * whitespace char.
    241 				 */
    242 				while (!isspace(c) && (cp >= linebuf)) {
    243 					ungetc(c, fi);
    244 					c = *--cp;
    245 				}
    246 				*cp = '\0';
    247 			}
    248 		}
    249 
    250 		/*
    251 		 * Expand tabs on the way to canonb.
    252 		 */
    253 		col = 0;
    254 		cp = linebuf;
    255 		cp2 = canonb;
    256 		while ((c = *cp++) != 0) {
    257 			if (c != '\t') {
    258 				col++;
    259 				if (cp2-canonb < BUFSIZ-1)
    260 					*cp2++ = c;
    261 				continue;
    262 			}
    263 			do {
    264 				if (cp2-canonb < BUFSIZ-1)
    265 					*cp2++ = ' ';
    266 				col++;
    267 			} while ((col & 07) != 0);
    268 		}
    269 
    270 		/*
    271 		 * Swipe trailing blanks from the line.
    272 		 */
    273 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
    274 			;
    275 		*++cp2 = '\0';
    276 		prefix(canonb, add_space);
    277 		if (c != EOF)
    278 			c = getc(fi);
    279 	}
    280 }
    281 
    282 /*
    283  * Take a line devoid of tabs and other garbage and determine its
    284  * blank prefix.  If the indent changes, call for a linebreak.
    285  * If the input line is blank, echo the blank line on the output.
    286  * Finally, if the line minus the prefix is a mail header, try to keep
    287  * it on a line by itself.
    288  */
    289 static void
    290 prefix(const char line[], int add_space)
    291 {
    292 	const char *cp;
    293 	char **hp;
    294 	int np, h;
    295 
    296 	if (strlen(line) == 0) {
    297 		oflush();
    298 		putchar('\n');
    299 		return;
    300 	}
    301 	for (cp = line; *cp == ' '; cp++)
    302 		;
    303 	np = cp - line;
    304 
    305 	/*
    306 	 * The following horrible expression attempts to avoid linebreaks
    307 	 * when the indent changes due to a paragraph.
    308 	 */
    309 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
    310 		oflush();
    311 	if ((h = ishead(cp)) != 0)
    312 		oflush(), mark = lineno;
    313 	if (lineno - mark < 3 && lineno - mark > 0)
    314 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
    315 			if (ispref(*hp, cp)) {
    316 				h = 1;
    317 				oflush();
    318 				break;
    319 			}
    320 	if (!h && (h = (*cp == '.')))
    321 		oflush();
    322 	pfx = np;
    323 	if (h) {
    324 		pack(cp, strlen(cp));
    325 		oflush();
    326 	} else
    327 		split(cp, add_space);
    328 	lineno++;
    329 }
    330 
    331 /*
    332  * Split up the passed line into output "words" which are
    333  * maximal strings of non-blanks with the blank separation
    334  * attached at the end.  Pass these words along to the output
    335  * line packer.
    336  */
    337 static void
    338 split(const char line[], int add_space)
    339 {
    340 	const char *cp;
    341 	char *cp2;
    342 	char word[BUFSIZ];
    343 	int wordl;		/* LIZ@UOM 6/18/85 */
    344 
    345 	cp = line;
    346 	while (*cp) {
    347 		cp2 = word;
    348 		wordl = 0;	/* LIZ@UOM 6/18/85 */
    349 
    350 		/*
    351 		 * Collect a 'word,' allowing it to contain escaped white
    352 		 * space.
    353 		 */
    354 		while (*cp && *cp != ' ') {
    355 			if (*cp == '\\' && isspace((unsigned char)cp[1]))
    356 				*cp2++ = *cp++;
    357 			*cp2++ = *cp++;
    358 			wordl++;/* LIZ@UOM 6/18/85 */
    359 		}
    360 
    361 		/*
    362 		 * Guarantee a space at end of line. Two spaces after end of
    363 		 * sentence punctuation.
    364 		 */
    365 		if (*cp == '\0' && add_space) {
    366 			*cp2++ = ' ';
    367 			if (strchr(".:!", cp[-1]))
    368 				*cp2++ = ' ';
    369 		}
    370 		while (*cp == ' ')
    371 			*cp2++ = *cp++;
    372 		*cp2 = '\0';
    373 		/*
    374 		 * LIZ@UOM 6/18/85 pack(word);
    375 		 */
    376 		pack(word, wordl);
    377 	}
    378 }
    379 
    380 /*
    381  * Output section.
    382  * Build up line images from the words passed in.  Prefix
    383  * each line with correct number of blanks.  The buffer "outbuf"
    384  * contains the current partial line image, including prefixed blanks.
    385  * "outp" points to the next available space therein.  When outp is NOSTR,
    386  * there ain't nothing in there yet.  At the bottom of this whole mess,
    387  * leading tabs are reinserted.
    388  */
    389 char	outbuf[BUFSIZ];			/* Sandbagged output line image */
    390 char	*outp;				/* Pointer in above */
    391 
    392 /*
    393  * Initialize the output section.
    394  */
    395 static void
    396 setout(void)
    397 {
    398 	outp = NOSTR;
    399 }
    400 
    401 /*
    402  * Pack a word onto the output line.  If this is the beginning of
    403  * the line, push on the appropriately-sized string of blanks first.
    404  * If the word won't fit on the current line, flush and begin a new
    405  * line.  If the word is too long to fit all by itself on a line,
    406  * just give it its own and hope for the best.
    407  *
    408  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
    409  *	goal length, take it.  If not, then check to see if the line
    410  *	will be over the max length; if so put the word on the next
    411  *	line.  If not, check to see if the line will be closer to the
    412  *	goal length with or without the word and take it or put it on
    413  *	the next line accordingly.
    414  */
    415 
    416 /*
    417  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
    418  * pack(word)
    419  *	char word[];
    420  */
    421 static void
    422 pack(const char word[], int wl)
    423 {
    424 	const char *cp;
    425 	int s, t;
    426 
    427 	if (outp == NOSTR)
    428 		leadin();
    429 	/*
    430 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
    431 	 * length of the line before the word is added; t is now the length
    432 	 * of the line after the word is added
    433 	 *	t = strlen(word);
    434 	 *	if (t+s <= LENGTH)
    435 	 */
    436 	s = outp - outbuf;
    437 	t = wl + s;
    438 	if ((t <= goal_length) ||
    439 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
    440 		/*
    441 		 * In like flint!
    442 		 */
    443 		for (cp = word; *cp; *outp++ = *cp++);
    444 		return;
    445 	}
    446 	if (s > pfx) {
    447 		oflush();
    448 		leadin();
    449 	}
    450 	for (cp = word; *cp; *outp++ = *cp++);
    451 }
    452 
    453 /*
    454  * If there is anything on the current output line, send it on
    455  * its way.  Set outp to NOSTR to indicate the absence of the current
    456  * line prefix.
    457  */
    458 static void
    459 oflush(void)
    460 {
    461 	if (outp == NOSTR)
    462 		return;
    463 	*outp = '\0';
    464 	tabulate(outbuf);
    465 	outp = NOSTR;
    466 }
    467 
    468 /*
    469  * Take the passed line buffer, insert leading tabs where possible, and
    470  * output on standard output (finally).
    471  */
    472 static void
    473 tabulate(char line[])
    474 {
    475 	char *cp;
    476 	int b, t;
    477 
    478 	/*
    479 	 * Toss trailing blanks in the output line.
    480 	 */
    481 	cp = line + strlen(line) - 1;
    482 	while (cp >= line && *cp == ' ')
    483 		cp--;
    484 	*++cp = '\0';
    485 
    486 	/*
    487 	 * Count the leading blank space and tabulate.
    488 	 */
    489 	for (cp = line; *cp == ' '; cp++)
    490 		;
    491 	b = cp-line;
    492 	t = b >> 3;
    493 	b &= 07;
    494 	if (t > 0)
    495 		do
    496 			putc('\t', stdout);
    497 		while (--t);
    498 	if (b > 0)
    499 		do
    500 			putc(' ', stdout);
    501 		while (--b);
    502 	while (*cp)
    503 		putc(*cp++, stdout);
    504 	putc('\n', stdout);
    505 }
    506 
    507 /*
    508  * Initialize the output line with the appropriate number of
    509  * leading blanks.
    510  */
    511 static void
    512 leadin(void)
    513 {
    514 	int b;
    515 	char *cp;
    516 
    517 	for (b = 0, cp = outbuf; b < pfx; b++)
    518 		*cp++ = ' ';
    519 	outp = cp;
    520 }
    521 
    522 /*
    523  * Is s1 a prefix of s2??
    524  */
    525 static int
    526 ispref(const char *s1, const char *s2)
    527 {
    528 
    529 	while (*s1++ == *s2)
    530 		;
    531 	return (*s1 == '\0');
    532 }
    533