fmt.c revision 1.12 1 /* $NetBSD: fmt.c,v 1.12 2000/09/15 11:23:17 abs Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 The Regents of the University of California. All rights reserved.\n");
40 #endif /* not lint */
41
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93";
45 #endif
46 __RCSID("$NetBSD: fmt.c,v 1.12 2000/09/15 11:23:17 abs Exp $");
47 #endif /* not lint */
48
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <ctype.h>
53 #include <locale.h>
54
55 /*
56 * fmt -- format the concatenation of input files or standard input
57 * onto standard output. Designed for use with Mail ~|
58 *
59 * Syntax : fmt [ goal [ max ] ] [ name ... ]
60 * Authors: Kurt Shoens (UCB) 12/7/78;
61 * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
62 */
63
64 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
65 * #define LENGTH 72 Max line length in output
66 */
67 #define NOSTR ((char *) 0) /* Null string pointer for lint */
68
69 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
70 #define GOAL_LENGTH 65
71 #define MAX_LENGTH 75
72 int goal_length; /* Target or goal line length in output */
73 int max_length; /* Max line length in output */
74 int pfx; /* Current leading blank count */
75 int lineno; /* Current input line */
76 int mark; /* Last place we saw a head line */
77 int center;
78
79 char *headnames[] = {"To", "Subject", "Cc", 0};
80
81 static void fmt __P((FILE *));
82 static int ispref __P((const char *, const char *));
83 static void leadin __P((void));
84 static void oflush __P((void));
85 static void pack __P((const char *, int));
86 static void prefix __P((const char *, int));
87 static void setout __P((void));
88 static void split __P((const char *, int));
89 static void tabulate __P((char *));
90
91 int ishead __P((const char *));
92 int main __P((int, char **));
93
94 /*
95 * Drive the whole formatter by managing input files. Also,
96 * cause initialization of the output stuff and flush it out
97 * at the end.
98 */
99
100 int
101 main(argc, argv)
102 int argc;
103 char **argv;
104 {
105 FILE *fi;
106 int errs = 0;
107 int number; /* LIZ@UOM 6/18/85 */
108
109 goal_length = GOAL_LENGTH;
110 max_length = MAX_LENGTH;
111 setout();
112 lineno = 1;
113 mark = -10;
114
115 setlocale(LC_ALL, "");
116
117 /*
118 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
119 */
120 if (argc > 1 && !strcmp(argv[1], "-c")) {
121 center++;
122 argc--;
123 argv++;
124 }
125 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
126 argv++;
127 argc--;
128 goal_length = abs(number);
129 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
130 argv++;
131 argc--;
132 max_length = abs(number);
133 }
134 }
135 if (max_length <= goal_length) {
136 fprintf(stderr, "Max length must be greater than %s\n",
137 "goal length");
138 exit(1);
139 }
140 if (argc < 2) {
141 fmt(stdin);
142 oflush();
143 exit(0);
144 }
145 while (--argc) {
146 if ((fi = fopen(*++argv, "r")) == NULL) {
147 perror(*argv);
148 errs++;
149 continue;
150 }
151 fmt(fi);
152 fclose(fi);
153 }
154 oflush();
155 exit(errs);
156 }
157
158 /*
159 * Read up characters from the passed input file, forming lines,
160 * doing ^H processing, expanding tabs, stripping trailing blanks,
161 * and sending each line down for analysis.
162 */
163 static void
164 fmt(fi)
165 FILE *fi;
166 {
167 char linebuf[BUFSIZ], canonb[BUFSIZ];
168 char *cp, *cp2;
169 int c, col, add_space;
170
171 if (center) {
172 while (1) {
173 cp = fgets(linebuf, BUFSIZ, fi);
174 if (!cp)
175 return;
176 while (*cp && isspace(*cp))
177 cp++;
178 cp2 = cp + strlen(cp) - 1;
179 while (cp2 > cp && isspace(*cp2))
180 cp2--;
181 if (cp == cp2)
182 putchar('\n');
183 col = cp2 - cp;
184 for (c = 0; c < (goal_length-col)/2; c++)
185 putchar(' ');
186 while (cp <= cp2)
187 putchar(*cp++);
188 putchar('\n');
189 }
190 }
191 c = getc(fi);
192 while (c != EOF) {
193 /*
194 * Collect a line, doing ^H processing.
195 * Leave tabs for now.
196 */
197 cp = linebuf;
198 while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
199 if (c == '\b') {
200 if (cp > linebuf)
201 cp--;
202 c = getc(fi);
203 continue;
204 }
205 if(!(isprint(c) || c == '\t')) {
206 c = getc(fi);
207 continue;
208 }
209 *cp++ = c;
210 c = getc(fi);
211 }
212 *cp = '\0';
213
214 /*
215 * By default, add space after the end of current input
216 * (normally end of line)
217 */
218 add_space = 1;
219
220 /*
221 * If the input line is longer than linebuf buffer can hold,
222 * process the data read so far as if it was a separate line -
223 * if there is any whitespace character in the read data,
224 * process all the data up to it, otherwise process all.
225 */
226 if (c != '\n' && c != EOF && !isspace(c)) {
227 /*
228 * Find out if any whitespace character has been read.
229 */
230 for(cp2 = cp; cp2 >= linebuf
231 && !isspace((unsigned char)*cp2); cp2--);
232
233 if (cp2 < linebuf) {
234 /*
235 * ungetc() last read character so that it
236 * won't get lost.
237 */
238 ungetc(c, fi);
239 /*
240 * Don't append space on the end in split().
241 */
242 add_space = 0;
243 } else {
244 /*
245 * To avoid splitting a word in a middle,
246 * ungetc() all characters after last
247 * whitespace char.
248 */
249 while (!isspace(c) && (cp >= linebuf)) {
250 ungetc(c, fi);
251 c = *--cp;
252 }
253 *cp = '\0';
254 }
255 }
256
257 /*
258 * Expand tabs on the way to canonb.
259 */
260 col = 0;
261 cp = linebuf;
262 cp2 = canonb;
263 while ((c = *cp++) != 0) {
264 if (c != '\t') {
265 col++;
266 if (cp2-canonb < BUFSIZ-1)
267 *cp2++ = c;
268 continue;
269 }
270 do {
271 if (cp2-canonb < BUFSIZ-1)
272 *cp2++ = ' ';
273 col++;
274 } while ((col & 07) != 0);
275 }
276
277 /*
278 * Swipe trailing blanks from the line.
279 */
280 for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
281 ;
282 *++cp2 = '\0';
283 prefix(canonb, add_space);
284 if (c != EOF)
285 c = getc(fi);
286 }
287 }
288
289 /*
290 * Take a line devoid of tabs and other garbage and determine its
291 * blank prefix. If the indent changes, call for a linebreak.
292 * If the input line is blank, echo the blank line on the output.
293 * Finally, if the line minus the prefix is a mail header, try to keep
294 * it on a line by itself.
295 */
296 static void
297 prefix(line, add_space)
298 const char line[];
299 int add_space;
300 {
301 const char *cp;
302 char **hp;
303 int np, h;
304
305 if (strlen(line) == 0) {
306 oflush();
307 putchar('\n');
308 return;
309 }
310 for (cp = line; *cp == ' '; cp++)
311 ;
312 np = cp - line;
313
314 /*
315 * The following horrible expression attempts to avoid linebreaks
316 * when the indent changes due to a paragraph.
317 */
318 if (np != pfx && (np > pfx || abs(pfx-np) > 8))
319 oflush();
320 if ((h = ishead(cp)) != 0)
321 oflush(), mark = lineno;
322 if (lineno - mark < 3 && lineno - mark > 0)
323 for (hp = &headnames[0]; *hp != (char *) 0; hp++)
324 if (ispref(*hp, cp)) {
325 h = 1;
326 oflush();
327 break;
328 }
329 if (!h && (h = (*cp == '.')))
330 oflush();
331 pfx = np;
332 if (h) {
333 pack(cp, strlen(cp));
334 oflush();
335 } else
336 split(cp, add_space);
337 lineno++;
338 }
339
340 /*
341 * Split up the passed line into output "words" which are
342 * maximal strings of non-blanks with the blank separation
343 * attached at the end. Pass these words along to the output
344 * line packer.
345 */
346 static void
347 split(line, add_space)
348 const char line[];
349 int add_space;
350 {
351 const char *cp;
352 char *cp2;
353 char word[BUFSIZ];
354 int wordl; /* LIZ@UOM 6/18/85 */
355
356 cp = line;
357 while (*cp) {
358 cp2 = word;
359 wordl = 0; /* LIZ@UOM 6/18/85 */
360
361 /*
362 * Collect a 'word,' allowing it to contain escaped white
363 * space.
364 */
365 while (*cp && *cp != ' ') {
366 if (*cp == '\\' && isspace((unsigned char)cp[1]))
367 *cp2++ = *cp++;
368 *cp2++ = *cp++;
369 wordl++;/* LIZ@UOM 6/18/85 */
370 }
371
372 /*
373 * Guarantee a space at end of line. Two spaces after end of
374 * sentence punctuation.
375 */
376 if (*cp == '\0' && add_space) {
377 *cp2++ = ' ';
378 if (strchr(".:!", cp[-1]))
379 *cp2++ = ' ';
380 }
381 while (*cp == ' ')
382 *cp2++ = *cp++;
383 *cp2 = '\0';
384 /*
385 * LIZ@UOM 6/18/85 pack(word);
386 */
387 pack(word, wordl);
388 }
389 }
390
391 /*
392 * Output section.
393 * Build up line images from the words passed in. Prefix
394 * each line with correct number of blanks. The buffer "outbuf"
395 * contains the current partial line image, including prefixed blanks.
396 * "outp" points to the next available space therein. When outp is NOSTR,
397 * there ain't nothing in there yet. At the bottom of this whole mess,
398 * leading tabs are reinserted.
399 */
400 char outbuf[BUFSIZ]; /* Sandbagged output line image */
401 char *outp; /* Pointer in above */
402
403 /*
404 * Initialize the output section.
405 */
406 static void
407 setout()
408 {
409 outp = NOSTR;
410 }
411
412 /*
413 * Pack a word onto the output line. If this is the beginning of
414 * the line, push on the appropriately-sized string of blanks first.
415 * If the word won't fit on the current line, flush and begin a new
416 * line. If the word is too long to fit all by itself on a line,
417 * just give it its own and hope for the best.
418 *
419 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
420 * goal length, take it. If not, then check to see if the line
421 * will be over the max length; if so put the word on the next
422 * line. If not, check to see if the line will be closer to the
423 * goal length with or without the word and take it or put it on
424 * the next line accordingly.
425 */
426
427 /*
428 * LIZ@UOM 6/18/85 -- pass in the length of the word as well
429 * pack(word)
430 * char word[];
431 */
432 static void
433 pack(word,wl)
434 const char word[];
435 int wl;
436 {
437 const char *cp;
438 int s, t;
439
440 if (outp == NOSTR)
441 leadin();
442 /*
443 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
444 * length of the line before the word is added; t is now the length
445 * of the line after the word is added
446 * t = strlen(word);
447 * if (t+s <= LENGTH)
448 */
449 s = outp - outbuf;
450 t = wl + s;
451 if ((t <= goal_length) ||
452 ((t <= max_length) && (t - goal_length <= goal_length - s))) {
453 /*
454 * In like flint!
455 */
456 for (cp = word; *cp; *outp++ = *cp++);
457 return;
458 }
459 if (s > pfx) {
460 oflush();
461 leadin();
462 }
463 for (cp = word; *cp; *outp++ = *cp++);
464 }
465
466 /*
467 * If there is anything on the current output line, send it on
468 * its way. Set outp to NOSTR to indicate the absence of the current
469 * line prefix.
470 */
471 static void
472 oflush()
473 {
474 if (outp == NOSTR)
475 return;
476 *outp = '\0';
477 tabulate(outbuf);
478 outp = NOSTR;
479 }
480
481 /*
482 * Take the passed line buffer, insert leading tabs where possible, and
483 * output on standard output (finally).
484 */
485 static void
486 tabulate(line)
487 char line[];
488 {
489 char *cp;
490 int b, t;
491
492 /*
493 * Toss trailing blanks in the output line.
494 */
495 cp = line + strlen(line) - 1;
496 while (cp >= line && *cp == ' ')
497 cp--;
498 *++cp = '\0';
499
500 /*
501 * Count the leading blank space and tabulate.
502 */
503 for (cp = line; *cp == ' '; cp++)
504 ;
505 b = cp-line;
506 t = b >> 3;
507 b &= 07;
508 if (t > 0)
509 do
510 putc('\t', stdout);
511 while (--t);
512 if (b > 0)
513 do
514 putc(' ', stdout);
515 while (--b);
516 while (*cp)
517 putc(*cp++, stdout);
518 putc('\n', stdout);
519 }
520
521 /*
522 * Initialize the output line with the appropriate number of
523 * leading blanks.
524 */
525 static void
526 leadin()
527 {
528 int b;
529 char *cp;
530
531 for (b = 0, cp = outbuf; b < pfx; b++)
532 *cp++ = ' ';
533 outp = cp;
534 }
535
536 /*
537 * Is s1 a prefix of s2??
538 */
539 static int
540 ispref(s1, s2)
541 const char *s1, *s2;
542 {
543
544 while (*s1++ == *s2)
545 ;
546 return (*s1 == '\0');
547 }
548