Home | History | Annotate | Line # | Download | only in fmt
fmt.c revision 1.24
      1 /*	$NetBSD: fmt.c,v 1.24 2006/01/15 14:26:10 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1980, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #ifndef lint
     34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
     35 	The Regents of the University of California.  All rights reserved.\n");
     36 #endif /* not lint */
     37 
     38 #ifndef lint
     39 #if 0
     40 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
     41 #endif
     42 __RCSID("$NetBSD: fmt.c,v 1.24 2006/01/15 14:26:10 christos Exp $");
     43 #endif /* not lint */
     44 
     45 #include <ctype.h>
     46 #include <locale.h>
     47 #include <stdio.h>
     48 #include <stdlib.h>
     49 #include <string.h>
     50 #include "buffer.h"
     51 
     52 /*
     53  * fmt -- format the concatenation of input files or standard input
     54  * onto standard output.  Designed for use with Mail ~|
     55  *
     56  * Syntax : fmt [ goal [ max ] ] [ name ... ]
     57  * Authors: Kurt Shoens (UCB) 12/7/78;
     58  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
     59  */
     60 
     61 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
     62 #define GOAL_LENGTH 65
     63 #define MAX_LENGTH 75
     64 static size_t	goal_length;	/* Target or goal line length in output */
     65 static size_t	max_length;	/* Max line length in output */
     66 static size_t	pfx;		/* Current leading blank count */
     67 static int	lineno;		/* Current input line */
     68 static int	mark;		/* Last place we saw a head line */
     69 static int	center;
     70 static struct buffer outbuf;
     71 
     72 static const char	*headnames[] = {"To", "Subject", "Cc", 0};
     73 
     74 static void	fmt(FILE *);
     75 static int	ispref(const char *, const char *);
     76 static void	leadin(void);
     77 static void	oflush(void);
     78 static void	pack(const char *, size_t);
     79 static void	prefix(const struct buffer *, int);
     80 static void	split(const char *, int);
     81 static void	tabulate(struct buffer *);
     82 
     83 
     84 int		ishead(const char *);
     85 
     86 /*
     87  * Drive the whole formatter by managing input files.  Also,
     88  * cause initialization of the output stuff and flush it out
     89  * at the end.
     90  */
     91 
     92 int
     93 main(int argc, char **argv)
     94 {
     95 	FILE *fi;
     96 	int errs = 0;
     97 	int number;		/* LIZ@UOM 6/18/85 */
     98 
     99 	goal_length = GOAL_LENGTH;
    100 	max_length = MAX_LENGTH;
    101 	buf_init(&outbuf);
    102 	lineno = 1;
    103 	mark = -10;
    104 
    105 	setprogname(*argv);
    106 	(void)setlocale(LC_ALL, "");
    107 
    108 	/*
    109 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
    110 	 */
    111 	if (argc > 1 && !strcmp(argv[1], "-C")) {
    112 		center++;
    113 		argc--;
    114 		argv++;
    115 	}
    116 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
    117 		argv++;
    118 		argc--;
    119 		goal_length = abs(number);
    120 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
    121 			argv++;
    122 			argc--;
    123 			max_length = abs(number);
    124 		}
    125 	}
    126 	if (max_length <= goal_length) {
    127 		errx(1, "Max length (%zu) must be greater than goal "
    128 		    "length (%zu)", max_length, goal_length);
    129 	}
    130 	if (argc < 2) {
    131 		fmt(stdin);
    132 		oflush();
    133 		return 0;
    134 	}
    135 	while (--argc) {
    136 		if ((fi = fopen(*++argv, "r")) == NULL) {
    137 			warn("Cannot open `%s'", *argv);
    138 			errs++;
    139 			continue;
    140 		}
    141 		fmt(fi);
    142 		(void)fclose(fi);
    143 	}
    144 	oflush();
    145 	buf_end(&outbuf);
    146 	return errs;
    147 }
    148 
    149 /*
    150  * Read up characters from the passed input file, forming lines,
    151  * doing ^H processing, expanding tabs, stripping trailing blanks,
    152  * and sending each line down for analysis.
    153  */
    154 static void
    155 fmt(FILE *fi)
    156 {
    157 	struct buffer lbuf, cbuf;
    158 	char *cp, *cp2;
    159 	int c, add_space;
    160 	size_t len, col;
    161 
    162 	if (center) {
    163 		for (;;) {
    164 			cp = fgetln(fi, &len);
    165 			if (!cp)
    166 				return;
    167 			cp2 = cp + len - 1;
    168 			while (len-- && isspace((unsigned char)*cp))
    169 				cp++;
    170 			while (cp2 > cp && isspace((unsigned char)*cp2))
    171 				cp2--;
    172 			if (cp == cp2)
    173 				(void)putchar('\n');
    174 			col = cp2 - cp;
    175 			if (goal_length > col)
    176 				for (c = 0; c < (goal_length - col) / 2; c++)
    177 					(void)putchar(' ');
    178 			while (cp <= cp2)
    179 				(void)putchar(*cp++);
    180 			(void)putchar('\n');
    181 		}
    182 	}
    183 
    184 	buf_init(&lbuf);
    185 	buf_init(&cbuf);
    186 	c = getc(fi);
    187 
    188 	while (c != EOF) {
    189 		/*
    190 		 * Collect a line, doing ^H processing.
    191 		 * Leave tabs for now.
    192 		 */
    193 		buf_reset(&lbuf);
    194 		while (c != '\n' && c != EOF) {
    195 			if (c == '\b') {
    196 				(void)buf_unputc(&lbuf);
    197 				c = getc(fi);
    198 				continue;
    199 			}
    200 			if(!(isprint(c) || c == '\t' || c >= 160)) {
    201 				c = getc(fi);
    202 				continue;
    203 			}
    204 			buf_putc(&lbuf, c);
    205 			c = getc(fi);
    206 		}
    207 		buf_putc(&lbuf, '\0');
    208 		(void)buf_unputc(&lbuf);
    209 		add_space = c != EOF;
    210 
    211 		/*
    212 		 * Expand tabs on the way.
    213 		 */
    214 		col = 0;
    215 		cp = lbuf.bptr;
    216 		buf_reset(&cbuf);
    217 		while ((c = *cp++) != '\0') {
    218 			if (c != '\t') {
    219 				col++;
    220 				buf_putc(&cbuf, c);
    221 				continue;
    222 			}
    223 			do {
    224 				buf_putc(&cbuf, ' ');
    225 				col++;
    226 			} while ((col & 07) != 0);
    227 		}
    228 
    229 		/*
    230 		 * Swipe trailing blanks from the line.
    231 		 */
    232 		for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
    233 			continue;
    234 		cbuf.ptr = cp2 + 1;
    235 		buf_putc(&cbuf, '\0');
    236 		(void)buf_unputc(&cbuf);
    237 		prefix(&cbuf, add_space);
    238 		if (c != EOF)
    239 			c = getc(fi);
    240 	}
    241 	buf_end(&cbuf);
    242 	buf_end(&lbuf);
    243 }
    244 
    245 /*
    246  * Take a line devoid of tabs and other garbage and determine its
    247  * blank prefix.  If the indent changes, call for a linebreak.
    248  * If the input line is blank, echo the blank line on the output.
    249  * Finally, if the line minus the prefix is a mail header, try to keep
    250  * it on a line by itself.
    251  */
    252 static void
    253 prefix(const struct buffer *buf, int add_space)
    254 {
    255 	const char *cp;
    256 	const char **hp;
    257 	size_t np;
    258 	int h;
    259 
    260 	if (buf->ptr == buf->bptr) {
    261 		oflush();
    262 		(void)putchar('\n');
    263 		return;
    264 	}
    265 	for (cp = buf->bptr; *cp == ' '; cp++)
    266 		continue;
    267 	np = cp - buf->bptr;
    268 
    269 	/*
    270 	 * The following horrible expression attempts to avoid linebreaks
    271 	 * when the indent changes due to a paragraph.
    272 	 */
    273 	if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
    274 		oflush();
    275 	if ((h = ishead(cp)) != 0) {
    276 		oflush();
    277 		mark = lineno;
    278 	}
    279 	if (lineno - mark < 3 && lineno - mark > 0)
    280 		for (hp = &headnames[0]; *hp != NULL; hp++)
    281 			if (ispref(*hp, cp)) {
    282 				h = 1;
    283 				oflush();
    284 				break;
    285 			}
    286 	if (!h && (h = (*cp == '.')))
    287 		oflush();
    288 	pfx = np;
    289 	if (h) {
    290 		pack(cp, (size_t)(buf->ptr - cp));
    291 		oflush();
    292 	} else
    293 		split(cp, add_space);
    294 	lineno++;
    295 }
    296 
    297 /*
    298  * Split up the passed line into output "words" which are
    299  * maximal strings of non-blanks with the blank separation
    300  * attached at the end.  Pass these words along to the output
    301  * line packer.
    302  */
    303 static void
    304 split(const char line[], int add_space)
    305 {
    306 	const char *cp;
    307 	struct buffer word;
    308 	size_t wlen;
    309 
    310 	buf_init(&word);
    311 	cp = line;
    312 	while (*cp) {
    313 		buf_reset(&word);
    314 		wlen = 0;
    315 
    316 		/*
    317 		 * Collect a 'word,' allowing it to contain escaped white
    318 		 * space.
    319 		 */
    320 		while (*cp && *cp != ' ') {
    321 			if (*cp == '\\' && isspace((unsigned char)cp[1]))
    322 				buf_putc(&word, *cp++);
    323 			buf_putc(&word, *cp++);
    324 			wlen++;
    325 		}
    326 
    327 		/*
    328 		 * Guarantee a space at end of line. Two spaces after end of
    329 		 * sentence punctuation.
    330 		 */
    331 		if (*cp == '\0' && add_space) {
    332 			buf_putc(&word, ' ');
    333 			if (strchr(".:!", cp[-1]))
    334 				buf_putc(&word, ' ');
    335 		}
    336 		while (*cp == ' ')
    337 			buf_putc(&word, *cp++);
    338 
    339 		buf_putc(&word, '\0');
    340 		(void)buf_unputc(&word);
    341 
    342 		pack(word.bptr, wlen);
    343 	}
    344 	buf_end(&word);
    345 }
    346 
    347 /*
    348  * Output section.
    349  * Build up line images from the words passed in.  Prefix
    350  * each line with correct number of blanks.
    351  *
    352  * At the bottom of this whole mess, leading tabs are reinserted.
    353  */
    354 
    355 /*
    356  * Pack a word onto the output line.  If this is the beginning of
    357  * the line, push on the appropriately-sized string of blanks first.
    358  * If the word won't fit on the current line, flush and begin a new
    359  * line.  If the word is too long to fit all by itself on a line,
    360  * just give it its own and hope for the best.
    361  *
    362  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
    363  *	goal length, take it.  If not, then check to see if the line
    364  *	will be over the max length; if so put the word on the next
    365  *	line.  If not, check to see if the line will be closer to the
    366  *	goal length with or without the word and take it or put it on
    367  *	the next line accordingly.
    368  */
    369 
    370 static void
    371 pack(const char *word, size_t wlen)
    372 {
    373 	const char *cp;
    374 	size_t s, t;
    375 
    376 	if (outbuf.bptr == outbuf.ptr)
    377 		leadin();
    378 	/*
    379 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
    380 	 * length of the line before the word is added; t is now the length
    381 	 * of the line after the word is added
    382 	 */
    383 	s = outbuf.ptr - outbuf.bptr;
    384 	t = wlen + s;
    385 	if ((t <= goal_length) || ((t <= max_length) &&
    386 	    (s <= goal_length) && (t - goal_length <= goal_length - s))) {
    387 		/*
    388 		 * In like flint!
    389 		 */
    390 		for (cp = word; *cp;)
    391 			buf_putc(&outbuf, *cp++);
    392 		return;
    393 	}
    394 	if (s > pfx) {
    395 		oflush();
    396 		leadin();
    397 	}
    398 	for (cp = word; *cp;)
    399 		buf_putc(&outbuf, *cp++);
    400 }
    401 
    402 /*
    403  * If there is anything on the current output line, send it on
    404  * its way.  Reset outbuf.
    405  */
    406 static void
    407 oflush(void)
    408 {
    409 	if (outbuf.bptr == outbuf.ptr)
    410 		return;
    411 	buf_putc(&outbuf, '\0');
    412 	(void)buf_unputc(&outbuf);
    413 	tabulate(&outbuf);
    414 	buf_reset(&outbuf);
    415 }
    416 
    417 /*
    418  * Take the passed line buffer, insert leading tabs where possible, and
    419  * output on standard output (finally).
    420  */
    421 static void
    422 tabulate(struct buffer *buf)
    423 {
    424 	char *cp;
    425 	size_t b, t;
    426 
    427 	/*
    428 	 * Toss trailing blanks in the output line.
    429 	 */
    430 	for (cp = buf->ptr; cp >= buf->bptr && *cp == ' '; cp--)
    431 		continue;
    432 	*cp = '\0';
    433 
    434 	/*
    435 	 * Count the leading blank space and tabulate.
    436 	 */
    437 	for (cp = buf->bptr; *cp == ' '; cp++)
    438 		continue;
    439 	b = cp - buf->bptr;
    440 	t = b / 8;
    441 	b = b % 8;
    442 	if (t > 0)
    443 		do
    444 			(void)putchar('\t');
    445 		while (--t);
    446 	if (b > 0)
    447 		do
    448 			(void)putchar(' ');
    449 		while (--b);
    450 	while (*cp)
    451 		(void)putchar(*cp++);
    452 	(void)putchar('\n');
    453 }
    454 
    455 /*
    456  * Initialize the output line with the appropriate number of
    457  * leading blanks.
    458  */
    459 static void
    460 leadin(void)
    461 {
    462 	size_t b;
    463 
    464 	buf_reset(&outbuf);
    465 
    466 	for (b = 0; b < pfx; b++)
    467 		buf_putc(&outbuf, ' ');
    468 }
    469 
    470 /*
    471  * Is s1 a prefix of s2??
    472  */
    473 static int
    474 ispref(const char *s1, const char *s2)
    475 {
    476 
    477 	while (*s1++ == *s2)
    478 		continue;
    479 	return *s1 == '\0';
    480 }
    481