fmt.c revision 1.24 1 /* $NetBSD: fmt.c,v 1.24 2006/01/15 14:26:10 christos Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
35 The Regents of the University of California. All rights reserved.\n");
36 #endif /* not lint */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93";
41 #endif
42 __RCSID("$NetBSD: fmt.c,v 1.24 2006/01/15 14:26:10 christos Exp $");
43 #endif /* not lint */
44
45 #include <ctype.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include "buffer.h"
51
52 /*
53 * fmt -- format the concatenation of input files or standard input
54 * onto standard output. Designed for use with Mail ~|
55 *
56 * Syntax : fmt [ goal [ max ] ] [ name ... ]
57 * Authors: Kurt Shoens (UCB) 12/7/78;
58 * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
59 */
60
61 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
62 #define GOAL_LENGTH 65
63 #define MAX_LENGTH 75
64 static size_t goal_length; /* Target or goal line length in output */
65 static size_t max_length; /* Max line length in output */
66 static size_t pfx; /* Current leading blank count */
67 static int lineno; /* Current input line */
68 static int mark; /* Last place we saw a head line */
69 static int center;
70 static struct buffer outbuf;
71
72 static const char *headnames[] = {"To", "Subject", "Cc", 0};
73
74 static void fmt(FILE *);
75 static int ispref(const char *, const char *);
76 static void leadin(void);
77 static void oflush(void);
78 static void pack(const char *, size_t);
79 static void prefix(const struct buffer *, int);
80 static void split(const char *, int);
81 static void tabulate(struct buffer *);
82
83
84 int ishead(const char *);
85
86 /*
87 * Drive the whole formatter by managing input files. Also,
88 * cause initialization of the output stuff and flush it out
89 * at the end.
90 */
91
92 int
93 main(int argc, char **argv)
94 {
95 FILE *fi;
96 int errs = 0;
97 int number; /* LIZ@UOM 6/18/85 */
98
99 goal_length = GOAL_LENGTH;
100 max_length = MAX_LENGTH;
101 buf_init(&outbuf);
102 lineno = 1;
103 mark = -10;
104
105 setprogname(*argv);
106 (void)setlocale(LC_ALL, "");
107
108 /*
109 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
110 */
111 if (argc > 1 && !strcmp(argv[1], "-C")) {
112 center++;
113 argc--;
114 argv++;
115 }
116 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
117 argv++;
118 argc--;
119 goal_length = abs(number);
120 if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
121 argv++;
122 argc--;
123 max_length = abs(number);
124 }
125 }
126 if (max_length <= goal_length) {
127 errx(1, "Max length (%zu) must be greater than goal "
128 "length (%zu)", max_length, goal_length);
129 }
130 if (argc < 2) {
131 fmt(stdin);
132 oflush();
133 return 0;
134 }
135 while (--argc) {
136 if ((fi = fopen(*++argv, "r")) == NULL) {
137 warn("Cannot open `%s'", *argv);
138 errs++;
139 continue;
140 }
141 fmt(fi);
142 (void)fclose(fi);
143 }
144 oflush();
145 buf_end(&outbuf);
146 return errs;
147 }
148
149 /*
150 * Read up characters from the passed input file, forming lines,
151 * doing ^H processing, expanding tabs, stripping trailing blanks,
152 * and sending each line down for analysis.
153 */
154 static void
155 fmt(FILE *fi)
156 {
157 struct buffer lbuf, cbuf;
158 char *cp, *cp2;
159 int c, add_space;
160 size_t len, col;
161
162 if (center) {
163 for (;;) {
164 cp = fgetln(fi, &len);
165 if (!cp)
166 return;
167 cp2 = cp + len - 1;
168 while (len-- && isspace((unsigned char)*cp))
169 cp++;
170 while (cp2 > cp && isspace((unsigned char)*cp2))
171 cp2--;
172 if (cp == cp2)
173 (void)putchar('\n');
174 col = cp2 - cp;
175 if (goal_length > col)
176 for (c = 0; c < (goal_length - col) / 2; c++)
177 (void)putchar(' ');
178 while (cp <= cp2)
179 (void)putchar(*cp++);
180 (void)putchar('\n');
181 }
182 }
183
184 buf_init(&lbuf);
185 buf_init(&cbuf);
186 c = getc(fi);
187
188 while (c != EOF) {
189 /*
190 * Collect a line, doing ^H processing.
191 * Leave tabs for now.
192 */
193 buf_reset(&lbuf);
194 while (c != '\n' && c != EOF) {
195 if (c == '\b') {
196 (void)buf_unputc(&lbuf);
197 c = getc(fi);
198 continue;
199 }
200 if(!(isprint(c) || c == '\t' || c >= 160)) {
201 c = getc(fi);
202 continue;
203 }
204 buf_putc(&lbuf, c);
205 c = getc(fi);
206 }
207 buf_putc(&lbuf, '\0');
208 (void)buf_unputc(&lbuf);
209 add_space = c != EOF;
210
211 /*
212 * Expand tabs on the way.
213 */
214 col = 0;
215 cp = lbuf.bptr;
216 buf_reset(&cbuf);
217 while ((c = *cp++) != '\0') {
218 if (c != '\t') {
219 col++;
220 buf_putc(&cbuf, c);
221 continue;
222 }
223 do {
224 buf_putc(&cbuf, ' ');
225 col++;
226 } while ((col & 07) != 0);
227 }
228
229 /*
230 * Swipe trailing blanks from the line.
231 */
232 for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
233 continue;
234 cbuf.ptr = cp2 + 1;
235 buf_putc(&cbuf, '\0');
236 (void)buf_unputc(&cbuf);
237 prefix(&cbuf, add_space);
238 if (c != EOF)
239 c = getc(fi);
240 }
241 buf_end(&cbuf);
242 buf_end(&lbuf);
243 }
244
245 /*
246 * Take a line devoid of tabs and other garbage and determine its
247 * blank prefix. If the indent changes, call for a linebreak.
248 * If the input line is blank, echo the blank line on the output.
249 * Finally, if the line minus the prefix is a mail header, try to keep
250 * it on a line by itself.
251 */
252 static void
253 prefix(const struct buffer *buf, int add_space)
254 {
255 const char *cp;
256 const char **hp;
257 size_t np;
258 int h;
259
260 if (buf->ptr == buf->bptr) {
261 oflush();
262 (void)putchar('\n');
263 return;
264 }
265 for (cp = buf->bptr; *cp == ' '; cp++)
266 continue;
267 np = cp - buf->bptr;
268
269 /*
270 * The following horrible expression attempts to avoid linebreaks
271 * when the indent changes due to a paragraph.
272 */
273 if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
274 oflush();
275 if ((h = ishead(cp)) != 0) {
276 oflush();
277 mark = lineno;
278 }
279 if (lineno - mark < 3 && lineno - mark > 0)
280 for (hp = &headnames[0]; *hp != NULL; hp++)
281 if (ispref(*hp, cp)) {
282 h = 1;
283 oflush();
284 break;
285 }
286 if (!h && (h = (*cp == '.')))
287 oflush();
288 pfx = np;
289 if (h) {
290 pack(cp, (size_t)(buf->ptr - cp));
291 oflush();
292 } else
293 split(cp, add_space);
294 lineno++;
295 }
296
297 /*
298 * Split up the passed line into output "words" which are
299 * maximal strings of non-blanks with the blank separation
300 * attached at the end. Pass these words along to the output
301 * line packer.
302 */
303 static void
304 split(const char line[], int add_space)
305 {
306 const char *cp;
307 struct buffer word;
308 size_t wlen;
309
310 buf_init(&word);
311 cp = line;
312 while (*cp) {
313 buf_reset(&word);
314 wlen = 0;
315
316 /*
317 * Collect a 'word,' allowing it to contain escaped white
318 * space.
319 */
320 while (*cp && *cp != ' ') {
321 if (*cp == '\\' && isspace((unsigned char)cp[1]))
322 buf_putc(&word, *cp++);
323 buf_putc(&word, *cp++);
324 wlen++;
325 }
326
327 /*
328 * Guarantee a space at end of line. Two spaces after end of
329 * sentence punctuation.
330 */
331 if (*cp == '\0' && add_space) {
332 buf_putc(&word, ' ');
333 if (strchr(".:!", cp[-1]))
334 buf_putc(&word, ' ');
335 }
336 while (*cp == ' ')
337 buf_putc(&word, *cp++);
338
339 buf_putc(&word, '\0');
340 (void)buf_unputc(&word);
341
342 pack(word.bptr, wlen);
343 }
344 buf_end(&word);
345 }
346
347 /*
348 * Output section.
349 * Build up line images from the words passed in. Prefix
350 * each line with correct number of blanks.
351 *
352 * At the bottom of this whole mess, leading tabs are reinserted.
353 */
354
355 /*
356 * Pack a word onto the output line. If this is the beginning of
357 * the line, push on the appropriately-sized string of blanks first.
358 * If the word won't fit on the current line, flush and begin a new
359 * line. If the word is too long to fit all by itself on a line,
360 * just give it its own and hope for the best.
361 *
362 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
363 * goal length, take it. If not, then check to see if the line
364 * will be over the max length; if so put the word on the next
365 * line. If not, check to see if the line will be closer to the
366 * goal length with or without the word and take it or put it on
367 * the next line accordingly.
368 */
369
370 static void
371 pack(const char *word, size_t wlen)
372 {
373 const char *cp;
374 size_t s, t;
375
376 if (outbuf.bptr == outbuf.ptr)
377 leadin();
378 /*
379 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
380 * length of the line before the word is added; t is now the length
381 * of the line after the word is added
382 */
383 s = outbuf.ptr - outbuf.bptr;
384 t = wlen + s;
385 if ((t <= goal_length) || ((t <= max_length) &&
386 (s <= goal_length) && (t - goal_length <= goal_length - s))) {
387 /*
388 * In like flint!
389 */
390 for (cp = word; *cp;)
391 buf_putc(&outbuf, *cp++);
392 return;
393 }
394 if (s > pfx) {
395 oflush();
396 leadin();
397 }
398 for (cp = word; *cp;)
399 buf_putc(&outbuf, *cp++);
400 }
401
402 /*
403 * If there is anything on the current output line, send it on
404 * its way. Reset outbuf.
405 */
406 static void
407 oflush(void)
408 {
409 if (outbuf.bptr == outbuf.ptr)
410 return;
411 buf_putc(&outbuf, '\0');
412 (void)buf_unputc(&outbuf);
413 tabulate(&outbuf);
414 buf_reset(&outbuf);
415 }
416
417 /*
418 * Take the passed line buffer, insert leading tabs where possible, and
419 * output on standard output (finally).
420 */
421 static void
422 tabulate(struct buffer *buf)
423 {
424 char *cp;
425 size_t b, t;
426
427 /*
428 * Toss trailing blanks in the output line.
429 */
430 for (cp = buf->ptr; cp >= buf->bptr && *cp == ' '; cp--)
431 continue;
432 *cp = '\0';
433
434 /*
435 * Count the leading blank space and tabulate.
436 */
437 for (cp = buf->bptr; *cp == ' '; cp++)
438 continue;
439 b = cp - buf->bptr;
440 t = b / 8;
441 b = b % 8;
442 if (t > 0)
443 do
444 (void)putchar('\t');
445 while (--t);
446 if (b > 0)
447 do
448 (void)putchar(' ');
449 while (--b);
450 while (*cp)
451 (void)putchar(*cp++);
452 (void)putchar('\n');
453 }
454
455 /*
456 * Initialize the output line with the appropriate number of
457 * leading blanks.
458 */
459 static void
460 leadin(void)
461 {
462 size_t b;
463
464 buf_reset(&outbuf);
465
466 for (b = 0; b < pfx; b++)
467 buf_putc(&outbuf, ' ');
468 }
469
470 /*
471 * Is s1 a prefix of s2??
472 */
473 static int
474 ispref(const char *s1, const char *s2)
475 {
476
477 while (*s1++ == *s2)
478 continue;
479 return *s1 == '\0';
480 }
481