fmt.c revision 1.17 1 /* $NetBSD: fmt.c,v 1.17 2003/08/07 11:13:47 agc Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
35 The Regents of the University of California. All rights reserved.\n");
36 #endif /* not lint */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93";
41 #endif
42 __RCSID("$NetBSD: fmt.c,v 1.17 2003/08/07 11:13:47 agc Exp $");
43 #endif /* not lint */
44
45 #include <ctype.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50
51 /*
52 * fmt -- format the concatenation of input files or standard input
53 * onto standard output. Designed for use with Mail ~|
54 *
55 * Syntax : fmt [ goal [ max ] ] [ name ... ]
56 * Authors: Kurt Shoens (UCB) 12/7/78;
57 * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
58 */
59
60 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
61 * #define LENGTH 72 Max line length in output
62 */
63 #define NOSTR ((char *) 0) /* Null string pointer for lint */
64
65 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
66 #define GOAL_LENGTH 65
67 #define MAX_LENGTH 75
68 int goal_length; /* Target or goal line length in output */
69 int max_length; /* Max line length in output */
70 int pfx; /* Current leading blank count */
71 int lineno; /* Current input line */
72 int mark; /* Last place we saw a head line */
73 int center;
74
75 char *headnames[] = {"To", "Subject", "Cc", 0};
76
77 static void fmt(FILE *);
78 static int ispref(const char *, const char *);
79 static void leadin(void);
80 static void oflush(void);
81 static void pack(const char *, int);
82 static void prefix(const char *, int);
83 static void setout(void);
84 static void split(const char *, int);
85 static void tabulate(char *);
86
87 int ishead(const char *);
88 int main(int, char **);
89
90 /*
91 * Drive the whole formatter by managing input files. Also,
92 * cause initialization of the output stuff and flush it out
93 * at the end.
94 */
95
96 int
97 main(int argc, char **argv)
98 {
99 FILE *fi;
100 int errs = 0;
101 int number; /* LIZ@UOM 6/18/85 */
102
103 goal_length = GOAL_LENGTH;
104 max_length = MAX_LENGTH;
105 setout();
106 lineno = 1;
107 mark = -10;
108
109 setlocale(LC_ALL, "");
110
111 /*
112 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
113 */
114 if (argc > 1 && !strcmp(argv[1], "-C")) {
115 center++;
116 argc--;
117 argv++;
118 }
119 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
120 argv++;
121 argc--;
122 goal_length = abs(number);
123 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
124 argv++;
125 argc--;
126 max_length = abs(number);
127 }
128 }
129 if (max_length <= goal_length) {
130 fprintf(stderr, "Max length must be greater than %s\n",
131 "goal length");
132 exit(1);
133 }
134 if (argc < 2) {
135 fmt(stdin);
136 oflush();
137 exit(0);
138 }
139 while (--argc) {
140 if ((fi = fopen(*++argv, "r")) == NULL) {
141 perror(*argv);
142 errs++;
143 continue;
144 }
145 fmt(fi);
146 fclose(fi);
147 }
148 oflush();
149 exit(errs);
150 }
151
152 /*
153 * Read up characters from the passed input file, forming lines,
154 * doing ^H processing, expanding tabs, stripping trailing blanks,
155 * and sending each line down for analysis.
156 */
157 static void
158 fmt(FILE *fi)
159 {
160 char linebuf[BUFSIZ], canonb[BUFSIZ];
161 char *cp, *cp2;
162 int c, col, add_space;
163
164 if (center) {
165 while (1) {
166 cp = fgets(linebuf, BUFSIZ, fi);
167 if (!cp)
168 return;
169 while (*cp && isspace(*cp))
170 cp++;
171 cp2 = cp + strlen(cp) - 1;
172 while (cp2 > cp && isspace(*cp2))
173 cp2--;
174 if (cp == cp2)
175 putchar('\n');
176 col = cp2 - cp;
177 for (c = 0; c < (goal_length-col)/2; c++)
178 putchar(' ');
179 while (cp <= cp2)
180 putchar(*cp++);
181 putchar('\n');
182 }
183 }
184 c = getc(fi);
185 while (c != EOF) {
186 /*
187 * Collect a line, doing ^H processing.
188 * Leave tabs for now.
189 */
190 cp = linebuf;
191 while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
192 if (c == '\b') {
193 if (cp > linebuf)
194 cp--;
195 c = getc(fi);
196 continue;
197 }
198 if(!(isprint(c) || c == '\t' || c >= 160)) {
199 c = getc(fi);
200 continue;
201 }
202 *cp++ = c;
203 c = getc(fi);
204 }
205 *cp = '\0';
206
207 /*
208 * By default, add space after the end of current input
209 * (normally end of line)
210 */
211 add_space = 1;
212
213 /*
214 * If the input line is longer than linebuf buffer can hold,
215 * process the data read so far as if it was a separate line -
216 * if there is any whitespace character in the read data,
217 * process all the data up to it, otherwise process all.
218 */
219 if (c != '\n' && c != EOF && !isspace(c)) {
220 /*
221 * Find out if any whitespace character has been read.
222 */
223 for(cp2 = cp; cp2 >= linebuf
224 && !isspace((unsigned char)*cp2); cp2--);
225
226 if (cp2 < linebuf) {
227 /*
228 * ungetc() last read character so that it
229 * won't get lost.
230 */
231 ungetc(c, fi);
232 /*
233 * Don't append space on the end in split().
234 */
235 add_space = 0;
236 } else {
237 /*
238 * To avoid splitting a word in a middle,
239 * ungetc() all characters after last
240 * whitespace char.
241 */
242 while (!isspace(c) && (cp >= linebuf)) {
243 ungetc(c, fi);
244 c = *--cp;
245 }
246 *cp = '\0';
247 }
248 }
249
250 /*
251 * Expand tabs on the way to canonb.
252 */
253 col = 0;
254 cp = linebuf;
255 cp2 = canonb;
256 while ((c = *cp++) != 0) {
257 if (c != '\t') {
258 col++;
259 if (cp2-canonb < BUFSIZ-1)
260 *cp2++ = c;
261 continue;
262 }
263 do {
264 if (cp2-canonb < BUFSIZ-1)
265 *cp2++ = ' ';
266 col++;
267 } while ((col & 07) != 0);
268 }
269
270 /*
271 * Swipe trailing blanks from the line.
272 */
273 for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
274 ;
275 *++cp2 = '\0';
276 prefix(canonb, add_space);
277 if (c != EOF)
278 c = getc(fi);
279 }
280 }
281
282 /*
283 * Take a line devoid of tabs and other garbage and determine its
284 * blank prefix. If the indent changes, call for a linebreak.
285 * If the input line is blank, echo the blank line on the output.
286 * Finally, if the line minus the prefix is a mail header, try to keep
287 * it on a line by itself.
288 */
289 static void
290 prefix(const char line[], int add_space)
291 {
292 const char *cp;
293 char **hp;
294 int np, h;
295
296 if (strlen(line) == 0) {
297 oflush();
298 putchar('\n');
299 return;
300 }
301 for (cp = line; *cp == ' '; cp++)
302 ;
303 np = cp - line;
304
305 /*
306 * The following horrible expression attempts to avoid linebreaks
307 * when the indent changes due to a paragraph.
308 */
309 if (np != pfx && (np > pfx || abs(pfx-np) > 8))
310 oflush();
311 if ((h = ishead(cp)) != 0)
312 oflush(), mark = lineno;
313 if (lineno - mark < 3 && lineno - mark > 0)
314 for (hp = &headnames[0]; *hp != (char *) 0; hp++)
315 if (ispref(*hp, cp)) {
316 h = 1;
317 oflush();
318 break;
319 }
320 if (!h && (h = (*cp == '.')))
321 oflush();
322 pfx = np;
323 if (h) {
324 pack(cp, strlen(cp));
325 oflush();
326 } else
327 split(cp, add_space);
328 lineno++;
329 }
330
331 /*
332 * Split up the passed line into output "words" which are
333 * maximal strings of non-blanks with the blank separation
334 * attached at the end. Pass these words along to the output
335 * line packer.
336 */
337 static void
338 split(const char line[], int add_space)
339 {
340 const char *cp;
341 char *cp2;
342 char word[BUFSIZ];
343 int wordl; /* LIZ@UOM 6/18/85 */
344
345 cp = line;
346 while (*cp) {
347 cp2 = word;
348 wordl = 0; /* LIZ@UOM 6/18/85 */
349
350 /*
351 * Collect a 'word,' allowing it to contain escaped white
352 * space.
353 */
354 while (*cp && *cp != ' ') {
355 if (*cp == '\\' && isspace((unsigned char)cp[1]))
356 *cp2++ = *cp++;
357 *cp2++ = *cp++;
358 wordl++;/* LIZ@UOM 6/18/85 */
359 }
360
361 /*
362 * Guarantee a space at end of line. Two spaces after end of
363 * sentence punctuation.
364 */
365 if (*cp == '\0' && add_space) {
366 *cp2++ = ' ';
367 if (strchr(".:!", cp[-1]))
368 *cp2++ = ' ';
369 }
370 while (*cp == ' ')
371 *cp2++ = *cp++;
372 *cp2 = '\0';
373 /*
374 * LIZ@UOM 6/18/85 pack(word);
375 */
376 pack(word, wordl);
377 }
378 }
379
380 /*
381 * Output section.
382 * Build up line images from the words passed in. Prefix
383 * each line with correct number of blanks. The buffer "outbuf"
384 * contains the current partial line image, including prefixed blanks.
385 * "outp" points to the next available space therein. When outp is NOSTR,
386 * there ain't nothing in there yet. At the bottom of this whole mess,
387 * leading tabs are reinserted.
388 */
389 char outbuf[BUFSIZ]; /* Sandbagged output line image */
390 char *outp; /* Pointer in above */
391
392 /*
393 * Initialize the output section.
394 */
395 static void
396 setout(void)
397 {
398 outp = NOSTR;
399 }
400
401 /*
402 * Pack a word onto the output line. If this is the beginning of
403 * the line, push on the appropriately-sized string of blanks first.
404 * If the word won't fit on the current line, flush and begin a new
405 * line. If the word is too long to fit all by itself on a line,
406 * just give it its own and hope for the best.
407 *
408 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
409 * goal length, take it. If not, then check to see if the line
410 * will be over the max length; if so put the word on the next
411 * line. If not, check to see if the line will be closer to the
412 * goal length with or without the word and take it or put it on
413 * the next line accordingly.
414 */
415
416 /*
417 * LIZ@UOM 6/18/85 -- pass in the length of the word as well
418 * pack(word)
419 * char word[];
420 */
421 static void
422 pack(const char word[], int wl)
423 {
424 const char *cp;
425 int s, t;
426
427 if (outp == NOSTR)
428 leadin();
429 /*
430 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
431 * length of the line before the word is added; t is now the length
432 * of the line after the word is added
433 * t = strlen(word);
434 * if (t+s <= LENGTH)
435 */
436 s = outp - outbuf;
437 t = wl + s;
438 if ((t <= goal_length) ||
439 ((t <= max_length) && (t - goal_length <= goal_length - s))) {
440 /*
441 * In like flint!
442 */
443 for (cp = word; *cp; *outp++ = *cp++);
444 return;
445 }
446 if (s > pfx) {
447 oflush();
448 leadin();
449 }
450 for (cp = word; *cp; *outp++ = *cp++);
451 }
452
453 /*
454 * If there is anything on the current output line, send it on
455 * its way. Set outp to NOSTR to indicate the absence of the current
456 * line prefix.
457 */
458 static void
459 oflush(void)
460 {
461 if (outp == NOSTR)
462 return;
463 *outp = '\0';
464 tabulate(outbuf);
465 outp = NOSTR;
466 }
467
468 /*
469 * Take the passed line buffer, insert leading tabs where possible, and
470 * output on standard output (finally).
471 */
472 static void
473 tabulate(char line[])
474 {
475 char *cp;
476 int b, t;
477
478 /*
479 * Toss trailing blanks in the output line.
480 */
481 cp = line + strlen(line) - 1;
482 while (cp >= line && *cp == ' ')
483 cp--;
484 *++cp = '\0';
485
486 /*
487 * Count the leading blank space and tabulate.
488 */
489 for (cp = line; *cp == ' '; cp++)
490 ;
491 b = cp-line;
492 t = b >> 3;
493 b &= 07;
494 if (t > 0)
495 do
496 putc('\t', stdout);
497 while (--t);
498 if (b > 0)
499 do
500 putc(' ', stdout);
501 while (--b);
502 while (*cp)
503 putc(*cp++, stdout);
504 putc('\n', stdout);
505 }
506
507 /*
508 * Initialize the output line with the appropriate number of
509 * leading blanks.
510 */
511 static void
512 leadin(void)
513 {
514 int b;
515 char *cp;
516
517 for (b = 0, cp = outbuf; b < pfx; b++)
518 *cp++ = ' ';
519 outp = cp;
520 }
521
522 /*
523 * Is s1 a prefix of s2??
524 */
525 static int
526 ispref(const char *s1, const char *s2)
527 {
528
529 while (*s1++ == *s2)
530 ;
531 return (*s1 == '\0');
532 }
533