fmt.c revision 1.16 1 /* $NetBSD: fmt.c,v 1.16 2002/03/02 13:55:13 wiz Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 The Regents of the University of California. All rights reserved.\n");
40 #endif /* not lint */
41
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93";
45 #endif
46 __RCSID("$NetBSD: fmt.c,v 1.16 2002/03/02 13:55:13 wiz Exp $");
47 #endif /* not lint */
48
49 #include <ctype.h>
50 #include <locale.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54
55 /*
56 * fmt -- format the concatenation of input files or standard input
57 * onto standard output. Designed for use with Mail ~|
58 *
59 * Syntax : fmt [ goal [ max ] ] [ name ... ]
60 * Authors: Kurt Shoens (UCB) 12/7/78;
61 * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
62 */
63
64 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
65 * #define LENGTH 72 Max line length in output
66 */
67 #define NOSTR ((char *) 0) /* Null string pointer for lint */
68
69 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
70 #define GOAL_LENGTH 65
71 #define MAX_LENGTH 75
72 int goal_length; /* Target or goal line length in output */
73 int max_length; /* Max line length in output */
74 int pfx; /* Current leading blank count */
75 int lineno; /* Current input line */
76 int mark; /* Last place we saw a head line */
77 int center;
78
79 char *headnames[] = {"To", "Subject", "Cc", 0};
80
81 static void fmt(FILE *);
82 static int ispref(const char *, const char *);
83 static void leadin(void);
84 static void oflush(void);
85 static void pack(const char *, int);
86 static void prefix(const char *, int);
87 static void setout(void);
88 static void split(const char *, int);
89 static void tabulate(char *);
90
91 int ishead(const char *);
92 int main(int, char **);
93
94 /*
95 * Drive the whole formatter by managing input files. Also,
96 * cause initialization of the output stuff and flush it out
97 * at the end.
98 */
99
100 int
101 main(int argc, char **argv)
102 {
103 FILE *fi;
104 int errs = 0;
105 int number; /* LIZ@UOM 6/18/85 */
106
107 goal_length = GOAL_LENGTH;
108 max_length = MAX_LENGTH;
109 setout();
110 lineno = 1;
111 mark = -10;
112
113 setlocale(LC_ALL, "");
114
115 /*
116 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
117 */
118 if (argc > 1 && !strcmp(argv[1], "-C")) {
119 center++;
120 argc--;
121 argv++;
122 }
123 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
124 argv++;
125 argc--;
126 goal_length = abs(number);
127 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
128 argv++;
129 argc--;
130 max_length = abs(number);
131 }
132 }
133 if (max_length <= goal_length) {
134 fprintf(stderr, "Max length must be greater than %s\n",
135 "goal length");
136 exit(1);
137 }
138 if (argc < 2) {
139 fmt(stdin);
140 oflush();
141 exit(0);
142 }
143 while (--argc) {
144 if ((fi = fopen(*++argv, "r")) == NULL) {
145 perror(*argv);
146 errs++;
147 continue;
148 }
149 fmt(fi);
150 fclose(fi);
151 }
152 oflush();
153 exit(errs);
154 }
155
156 /*
157 * Read up characters from the passed input file, forming lines,
158 * doing ^H processing, expanding tabs, stripping trailing blanks,
159 * and sending each line down for analysis.
160 */
161 static void
162 fmt(FILE *fi)
163 {
164 char linebuf[BUFSIZ], canonb[BUFSIZ];
165 char *cp, *cp2;
166 int c, col, add_space;
167
168 if (center) {
169 while (1) {
170 cp = fgets(linebuf, BUFSIZ, fi);
171 if (!cp)
172 return;
173 while (*cp && isspace(*cp))
174 cp++;
175 cp2 = cp + strlen(cp) - 1;
176 while (cp2 > cp && isspace(*cp2))
177 cp2--;
178 if (cp == cp2)
179 putchar('\n');
180 col = cp2 - cp;
181 for (c = 0; c < (goal_length-col)/2; c++)
182 putchar(' ');
183 while (cp <= cp2)
184 putchar(*cp++);
185 putchar('\n');
186 }
187 }
188 c = getc(fi);
189 while (c != EOF) {
190 /*
191 * Collect a line, doing ^H processing.
192 * Leave tabs for now.
193 */
194 cp = linebuf;
195 while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
196 if (c == '\b') {
197 if (cp > linebuf)
198 cp--;
199 c = getc(fi);
200 continue;
201 }
202 if(!(isprint(c) || c == '\t' || c >= 160)) {
203 c = getc(fi);
204 continue;
205 }
206 *cp++ = c;
207 c = getc(fi);
208 }
209 *cp = '\0';
210
211 /*
212 * By default, add space after the end of current input
213 * (normally end of line)
214 */
215 add_space = 1;
216
217 /*
218 * If the input line is longer than linebuf buffer can hold,
219 * process the data read so far as if it was a separate line -
220 * if there is any whitespace character in the read data,
221 * process all the data up to it, otherwise process all.
222 */
223 if (c != '\n' && c != EOF && !isspace(c)) {
224 /*
225 * Find out if any whitespace character has been read.
226 */
227 for(cp2 = cp; cp2 >= linebuf
228 && !isspace((unsigned char)*cp2); cp2--);
229
230 if (cp2 < linebuf) {
231 /*
232 * ungetc() last read character so that it
233 * won't get lost.
234 */
235 ungetc(c, fi);
236 /*
237 * Don't append space on the end in split().
238 */
239 add_space = 0;
240 } else {
241 /*
242 * To avoid splitting a word in a middle,
243 * ungetc() all characters after last
244 * whitespace char.
245 */
246 while (!isspace(c) && (cp >= linebuf)) {
247 ungetc(c, fi);
248 c = *--cp;
249 }
250 *cp = '\0';
251 }
252 }
253
254 /*
255 * Expand tabs on the way to canonb.
256 */
257 col = 0;
258 cp = linebuf;
259 cp2 = canonb;
260 while ((c = *cp++) != 0) {
261 if (c != '\t') {
262 col++;
263 if (cp2-canonb < BUFSIZ-1)
264 *cp2++ = c;
265 continue;
266 }
267 do {
268 if (cp2-canonb < BUFSIZ-1)
269 *cp2++ = ' ';
270 col++;
271 } while ((col & 07) != 0);
272 }
273
274 /*
275 * Swipe trailing blanks from the line.
276 */
277 for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
278 ;
279 *++cp2 = '\0';
280 prefix(canonb, add_space);
281 if (c != EOF)
282 c = getc(fi);
283 }
284 }
285
286 /*
287 * Take a line devoid of tabs and other garbage and determine its
288 * blank prefix. If the indent changes, call for a linebreak.
289 * If the input line is blank, echo the blank line on the output.
290 * Finally, if the line minus the prefix is a mail header, try to keep
291 * it on a line by itself.
292 */
293 static void
294 prefix(const char line[], int add_space)
295 {
296 const char *cp;
297 char **hp;
298 int np, h;
299
300 if (strlen(line) == 0) {
301 oflush();
302 putchar('\n');
303 return;
304 }
305 for (cp = line; *cp == ' '; cp++)
306 ;
307 np = cp - line;
308
309 /*
310 * The following horrible expression attempts to avoid linebreaks
311 * when the indent changes due to a paragraph.
312 */
313 if (np != pfx && (np > pfx || abs(pfx-np) > 8))
314 oflush();
315 if ((h = ishead(cp)) != 0)
316 oflush(), mark = lineno;
317 if (lineno - mark < 3 && lineno - mark > 0)
318 for (hp = &headnames[0]; *hp != (char *) 0; hp++)
319 if (ispref(*hp, cp)) {
320 h = 1;
321 oflush();
322 break;
323 }
324 if (!h && (h = (*cp == '.')))
325 oflush();
326 pfx = np;
327 if (h) {
328 pack(cp, strlen(cp));
329 oflush();
330 } else
331 split(cp, add_space);
332 lineno++;
333 }
334
335 /*
336 * Split up the passed line into output "words" which are
337 * maximal strings of non-blanks with the blank separation
338 * attached at the end. Pass these words along to the output
339 * line packer.
340 */
341 static void
342 split(const char line[], int add_space)
343 {
344 const char *cp;
345 char *cp2;
346 char word[BUFSIZ];
347 int wordl; /* LIZ@UOM 6/18/85 */
348
349 cp = line;
350 while (*cp) {
351 cp2 = word;
352 wordl = 0; /* LIZ@UOM 6/18/85 */
353
354 /*
355 * Collect a 'word,' allowing it to contain escaped white
356 * space.
357 */
358 while (*cp && *cp != ' ') {
359 if (*cp == '\\' && isspace((unsigned char)cp[1]))
360 *cp2++ = *cp++;
361 *cp2++ = *cp++;
362 wordl++;/* LIZ@UOM 6/18/85 */
363 }
364
365 /*
366 * Guarantee a space at end of line. Two spaces after end of
367 * sentence punctuation.
368 */
369 if (*cp == '\0' && add_space) {
370 *cp2++ = ' ';
371 if (strchr(".:!", cp[-1]))
372 *cp2++ = ' ';
373 }
374 while (*cp == ' ')
375 *cp2++ = *cp++;
376 *cp2 = '\0';
377 /*
378 * LIZ@UOM 6/18/85 pack(word);
379 */
380 pack(word, wordl);
381 }
382 }
383
384 /*
385 * Output section.
386 * Build up line images from the words passed in. Prefix
387 * each line with correct number of blanks. The buffer "outbuf"
388 * contains the current partial line image, including prefixed blanks.
389 * "outp" points to the next available space therein. When outp is NOSTR,
390 * there ain't nothing in there yet. At the bottom of this whole mess,
391 * leading tabs are reinserted.
392 */
393 char outbuf[BUFSIZ]; /* Sandbagged output line image */
394 char *outp; /* Pointer in above */
395
396 /*
397 * Initialize the output section.
398 */
399 static void
400 setout(void)
401 {
402 outp = NOSTR;
403 }
404
405 /*
406 * Pack a word onto the output line. If this is the beginning of
407 * the line, push on the appropriately-sized string of blanks first.
408 * If the word won't fit on the current line, flush and begin a new
409 * line. If the word is too long to fit all by itself on a line,
410 * just give it its own and hope for the best.
411 *
412 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
413 * goal length, take it. If not, then check to see if the line
414 * will be over the max length; if so put the word on the next
415 * line. If not, check to see if the line will be closer to the
416 * goal length with or without the word and take it or put it on
417 * the next line accordingly.
418 */
419
420 /*
421 * LIZ@UOM 6/18/85 -- pass in the length of the word as well
422 * pack(word)
423 * char word[];
424 */
425 static void
426 pack(const char word[], int wl)
427 {
428 const char *cp;
429 int s, t;
430
431 if (outp == NOSTR)
432 leadin();
433 /*
434 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
435 * length of the line before the word is added; t is now the length
436 * of the line after the word is added
437 * t = strlen(word);
438 * if (t+s <= LENGTH)
439 */
440 s = outp - outbuf;
441 t = wl + s;
442 if ((t <= goal_length) ||
443 ((t <= max_length) && (t - goal_length <= goal_length - s))) {
444 /*
445 * In like flint!
446 */
447 for (cp = word; *cp; *outp++ = *cp++);
448 return;
449 }
450 if (s > pfx) {
451 oflush();
452 leadin();
453 }
454 for (cp = word; *cp; *outp++ = *cp++);
455 }
456
457 /*
458 * If there is anything on the current output line, send it on
459 * its way. Set outp to NOSTR to indicate the absence of the current
460 * line prefix.
461 */
462 static void
463 oflush(void)
464 {
465 if (outp == NOSTR)
466 return;
467 *outp = '\0';
468 tabulate(outbuf);
469 outp = NOSTR;
470 }
471
472 /*
473 * Take the passed line buffer, insert leading tabs where possible, and
474 * output on standard output (finally).
475 */
476 static void
477 tabulate(char line[])
478 {
479 char *cp;
480 int b, t;
481
482 /*
483 * Toss trailing blanks in the output line.
484 */
485 cp = line + strlen(line) - 1;
486 while (cp >= line && *cp == ' ')
487 cp--;
488 *++cp = '\0';
489
490 /*
491 * Count the leading blank space and tabulate.
492 */
493 for (cp = line; *cp == ' '; cp++)
494 ;
495 b = cp-line;
496 t = b >> 3;
497 b &= 07;
498 if (t > 0)
499 do
500 putc('\t', stdout);
501 while (--t);
502 if (b > 0)
503 do
504 putc(' ', stdout);
505 while (--b);
506 while (*cp)
507 putc(*cp++, stdout);
508 putc('\n', stdout);
509 }
510
511 /*
512 * Initialize the output line with the appropriate number of
513 * leading blanks.
514 */
515 static void
516 leadin(void)
517 {
518 int b;
519 char *cp;
520
521 for (b = 0, cp = outbuf; b < pfx; b++)
522 *cp++ = ' ';
523 outp = cp;
524 }
525
526 /*
527 * Is s1 a prefix of s2??
528 */
529 static int
530 ispref(const char *s1, const char *s2)
531 {
532
533 while (*s1++ == *s2)
534 ;
535 return (*s1 == '\0');
536 }
537