fmt.c revision 1.27 1 /* $NetBSD: fmt.c,v 1.27 2007/06/03 22:39:21 christos Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
35 The Regents of the University of California. All rights reserved.\n");
36 #endif /* not lint */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93";
41 #endif
42 __RCSID("$NetBSD: fmt.c,v 1.27 2007/06/03 22:39:21 christos Exp $");
43 #endif /* not lint */
44
45 #include <ctype.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <errno.h>
51 #include <err.h>
52 #include <limits.h>
53 #include <string.h>
54 #include "buffer.h"
55
56 /*
57 * fmt -- format the concatenation of input files or standard input
58 * onto standard output. Designed for use with Mail ~|
59 *
60 * Syntax : fmt [ goal [ max ] ] [ name ... ]
61 * Authors: Kurt Shoens (UCB) 12/7/78;
62 * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
63 */
64
65 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
66 #define GOAL_LENGTH 65
67 #define MAX_LENGTH 75
68 static size_t goal_length; /* Target or goal line length in output */
69 static size_t max_length; /* Max line length in output */
70 static size_t pfx; /* Current leading blank count */
71 static int raw; /* Don't treat mail specially */
72 static int lineno; /* Current input line */
73 static int mark; /* Last place we saw a head line */
74 static int center;
75 static struct buffer outbuf;
76
77 static const char *headnames[] = {"To", "Subject", "Cc", 0};
78
79 static void usage(void) __attribute__((__noreturn__));
80 static int getnum(const char *, const char *, size_t *, int);
81 static void fmt(FILE *);
82 static int ispref(const char *, const char *);
83 static void leadin(void);
84 static void oflush(void);
85 static void pack(const char *, size_t);
86 static void prefix(const struct buffer *, int);
87 static void split(const char *, int);
88 static void tabulate(struct buffer *);
89
90
91 int ishead(const char *);
92
93 /*
94 * Drive the whole formatter by managing input files. Also,
95 * cause initialization of the output stuff and flush it out
96 * at the end.
97 */
98
99 int
100 main(int argc, char **argv)
101 {
102 FILE *fi;
103 int errs = 0;
104 int compat = 1;
105 int c;
106
107 goal_length = GOAL_LENGTH;
108 max_length = MAX_LENGTH;
109 buf_init(&outbuf);
110 lineno = 1;
111 mark = -10;
112
113 setprogname(*argv);
114 (void)setlocale(LC_ALL, "");
115
116 while ((c = getopt(argc, argv, "Cg:m:r")) != -1)
117 switch (c) {
118 case 'C':
119 center++;
120 break;
121 case 'g':
122 (void)getnum(optarg, "goal", &goal_length, 1);
123 compat = 0;
124 break;
125 case 'm':
126 (void)getnum(optarg, "max", &max_length, 1);
127 compat = 0;
128 break;
129 case 'r':
130 raw++;
131 break;
132 default:
133 usage();
134 }
135
136 argc -= optind;
137 argv += optind;
138
139 /*
140 * compatibility with old usage.
141 */
142 if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) {
143 argv++;
144 argc--;
145 if (argc > 0 && getnum(*argv, "max", &max_length, 0)) {
146 argv++;
147 argc--;
148 }
149 }
150
151 if (max_length <= goal_length) {
152 errx(1, "Max length (%zu) must be greater than goal "
153 "length (%zu)", max_length, goal_length);
154 }
155 if (argc == 0) {
156 fmt(stdin);
157 oflush();
158 return 0;
159 }
160 while (argc--) {
161 if ((fi = fopen(*argv++, "r")) == NULL) {
162 warn("Cannot open `%s'", *argv);
163 errs++;
164 continue;
165 }
166 fmt(fi);
167 (void)fclose(fi);
168 }
169 oflush();
170 buf_end(&outbuf);
171 return errs;
172 }
173
174 static void
175 usage(void)
176 {
177 (void)fprintf(stderr,
178 "Usage: %s [-Cr] [-g <goal>] [-m <max>] [<files>..]\n"
179 "\t %s [-Cr] [<goal>] [<max>] [<files>]\n",
180 getprogname(), getprogname());
181 exit(1);
182 }
183
184 static int
185 getnum(const char *str, const char *what, size_t *res, int badnum)
186 {
187 unsigned long ul;
188 char *ep;
189
190 errno = 0;
191 ul = strtoul(str, &ep, 0);
192 if (*str != '\0' && *ep == '\0') {
193 if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX)
194 errx(1, "%s number `%s' too big", what, str);
195 *res = (size_t)ul;
196 return 1;
197 } else if (badnum)
198 errx(1, "Bad %s number `%s'", what, str);
199
200 return 0;
201 }
202
203 /*
204 * Read up characters from the passed input file, forming lines,
205 * doing ^H processing, expanding tabs, stripping trailing blanks,
206 * and sending each line down for analysis.
207 */
208 static void
209 fmt(FILE *fi)
210 {
211 struct buffer lbuf, cbuf;
212 char *cp, *cp2;
213 int c, add_space;
214 size_t len, col;
215
216 if (center) {
217 for (;;) {
218 cp = fgetln(fi, &len);
219 if (!cp)
220 return;
221 cp2 = cp + len - 1;
222 while (len-- && isspace((unsigned char)*cp))
223 cp++;
224 while (cp2 > cp && isspace((unsigned char)*cp2))
225 cp2--;
226 if (cp == cp2)
227 (void)putchar('\n');
228 col = cp2 - cp;
229 if (goal_length > col)
230 for (c = 0; c < (goal_length - col) / 2; c++)
231 (void)putchar(' ');
232 while (cp <= cp2)
233 (void)putchar(*cp++);
234 (void)putchar('\n');
235 }
236 }
237
238 buf_init(&lbuf);
239 buf_init(&cbuf);
240 c = getc(fi);
241
242 while (c != EOF) {
243 /*
244 * Collect a line, doing ^H processing.
245 * Leave tabs for now.
246 */
247 buf_reset(&lbuf);
248 while (c != '\n' && c != EOF) {
249 if (c == '\b') {
250 (void)buf_unputc(&lbuf);
251 c = getc(fi);
252 continue;
253 }
254 if(!(isprint(c) || c == '\t' || c >= 160)) {
255 c = getc(fi);
256 continue;
257 }
258 buf_putc(&lbuf, c);
259 c = getc(fi);
260 }
261 buf_putc(&lbuf, '\0');
262 (void)buf_unputc(&lbuf);
263 add_space = c != EOF;
264
265 /*
266 * Expand tabs on the way.
267 */
268 col = 0;
269 cp = lbuf.bptr;
270 buf_reset(&cbuf);
271 while ((c = *cp++) != '\0') {
272 if (c != '\t') {
273 col++;
274 buf_putc(&cbuf, c);
275 continue;
276 }
277 do {
278 buf_putc(&cbuf, ' ');
279 col++;
280 } while ((col & 07) != 0);
281 }
282
283 /*
284 * Swipe trailing blanks from the line.
285 */
286 for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
287 continue;
288 cbuf.ptr = cp2 + 1;
289 buf_putc(&cbuf, '\0');
290 (void)buf_unputc(&cbuf);
291 prefix(&cbuf, add_space);
292 if (c != EOF)
293 c = getc(fi);
294 }
295 buf_end(&cbuf);
296 buf_end(&lbuf);
297 }
298
299 /*
300 * Take a line devoid of tabs and other garbage and determine its
301 * blank prefix. If the indent changes, call for a linebreak.
302 * If the input line is blank, echo the blank line on the output.
303 * Finally, if the line minus the prefix is a mail header, try to keep
304 * it on a line by itself.
305 */
306 static void
307 prefix(const struct buffer *buf, int add_space)
308 {
309 const char *cp;
310 const char **hp;
311 size_t np;
312 int h;
313
314 if (buf->ptr == buf->bptr) {
315 oflush();
316 (void)putchar('\n');
317 return;
318 }
319 for (cp = buf->bptr; *cp == ' '; cp++)
320 continue;
321 np = cp - buf->bptr;
322
323 /*
324 * The following horrible expression attempts to avoid linebreaks
325 * when the indent changes due to a paragraph.
326 */
327 if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
328 oflush();
329 if (!raw) {
330 if ((h = ishead(cp)) != 0) {
331 oflush();
332 mark = lineno;
333 }
334 if (lineno - mark < 3 && lineno - mark > 0)
335 for (hp = &headnames[0]; *hp != NULL; hp++)
336 if (ispref(*hp, cp)) {
337 h = 1;
338 oflush();
339 break;
340 }
341 if (!h && (h = (*cp == '.')))
342 oflush();
343 } else
344 h = 0;
345 pfx = np;
346 if (h) {
347 pack(cp, (size_t)(buf->ptr - cp));
348 oflush();
349 } else
350 split(cp, add_space);
351 lineno++;
352 }
353
354 /*
355 * Split up the passed line into output "words" which are
356 * maximal strings of non-blanks with the blank separation
357 * attached at the end. Pass these words along to the output
358 * line packer.
359 */
360 static void
361 split(const char line[], int add_space)
362 {
363 const char *cp;
364 struct buffer word;
365 size_t wlen;
366
367 buf_init(&word);
368 cp = line;
369 while (*cp) {
370 buf_reset(&word);
371 wlen = 0;
372
373 /*
374 * Collect a 'word,' allowing it to contain escaped white
375 * space.
376 */
377 while (*cp && *cp != ' ') {
378 if (*cp == '\\' && isspace((unsigned char)cp[1]))
379 buf_putc(&word, *cp++);
380 buf_putc(&word, *cp++);
381 wlen++;
382 }
383
384 /*
385 * Guarantee a space at end of line. Two spaces after end of
386 * sentence punctuation.
387 */
388 if (*cp == '\0' && add_space) {
389 buf_putc(&word, ' ');
390 if (strchr(".:!", cp[-1]))
391 buf_putc(&word, ' ');
392 }
393 while (*cp == ' ')
394 buf_putc(&word, *cp++);
395
396 buf_putc(&word, '\0');
397 (void)buf_unputc(&word);
398
399 pack(word.bptr, wlen);
400 }
401 buf_end(&word);
402 }
403
404 /*
405 * Output section.
406 * Build up line images from the words passed in. Prefix
407 * each line with correct number of blanks.
408 *
409 * At the bottom of this whole mess, leading tabs are reinserted.
410 */
411
412 /*
413 * Pack a word onto the output line. If this is the beginning of
414 * the line, push on the appropriately-sized string of blanks first.
415 * If the word won't fit on the current line, flush and begin a new
416 * line. If the word is too long to fit all by itself on a line,
417 * just give it its own and hope for the best.
418 *
419 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
420 * goal length, take it. If not, then check to see if the line
421 * will be over the max length; if so put the word on the next
422 * line. If not, check to see if the line will be closer to the
423 * goal length with or without the word and take it or put it on
424 * the next line accordingly.
425 */
426
427 static void
428 pack(const char *word, size_t wlen)
429 {
430 const char *cp;
431 size_t s, t;
432
433 if (outbuf.bptr == outbuf.ptr)
434 leadin();
435 /*
436 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
437 * length of the line before the word is added; t is now the length
438 * of the line after the word is added
439 */
440 s = outbuf.ptr - outbuf.bptr;
441 t = wlen + s;
442 if ((t <= goal_length) || ((t <= max_length) &&
443 (s <= goal_length) && (t - goal_length <= goal_length - s))) {
444 /*
445 * In like flint!
446 */
447 for (cp = word; *cp;)
448 buf_putc(&outbuf, *cp++);
449 return;
450 }
451 if (s > pfx) {
452 oflush();
453 leadin();
454 }
455 for (cp = word; *cp;)
456 buf_putc(&outbuf, *cp++);
457 }
458
459 /*
460 * If there is anything on the current output line, send it on
461 * its way. Reset outbuf.
462 */
463 static void
464 oflush(void)
465 {
466 if (outbuf.bptr == outbuf.ptr)
467 return;
468 buf_putc(&outbuf, '\0');
469 (void)buf_unputc(&outbuf);
470 tabulate(&outbuf);
471 buf_reset(&outbuf);
472 }
473
474 /*
475 * Take the passed line buffer, insert leading tabs where possible, and
476 * output on standard output (finally).
477 */
478 static void
479 tabulate(struct buffer *buf)
480 {
481 char *cp;
482 size_t b, t;
483
484 /*
485 * Toss trailing blanks in the output line.
486 */
487 for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--)
488 continue;
489 *++cp = '\0';
490
491 /*
492 * Count the leading blank space and tabulate.
493 */
494 for (cp = buf->bptr; *cp == ' '; cp++)
495 continue;
496 b = cp - buf->bptr;
497 t = b / 8;
498 b = b % 8;
499 if (t > 0)
500 do
501 (void)putchar('\t');
502 while (--t);
503 if (b > 0)
504 do
505 (void)putchar(' ');
506 while (--b);
507 while (*cp)
508 (void)putchar(*cp++);
509 (void)putchar('\n');
510 }
511
512 /*
513 * Initialize the output line with the appropriate number of
514 * leading blanks.
515 */
516 static void
517 leadin(void)
518 {
519 size_t b;
520
521 buf_reset(&outbuf);
522
523 for (b = 0; b < pfx; b++)
524 buf_putc(&outbuf, ' ');
525 }
526
527 /*
528 * Is s1 a prefix of s2??
529 */
530 static int
531 ispref(const char *s1, const char *s2)
532 {
533
534 while (*s1++ == *s2)
535 continue;
536 return *s1 == '\0';
537 }
538