fmt.c revision 1.22 1 1.22 christos /* $NetBSD: fmt.c,v 1.22 2006/01/04 20:44:57 christos Exp $ */
2 1.4 jtc
3 1.1 cgd /*
4 1.4 jtc * Copyright (c) 1980, 1993
5 1.4 jtc * The Regents of the University of California. All rights reserved.
6 1.1 cgd *
7 1.1 cgd * Redistribution and use in source and binary forms, with or without
8 1.1 cgd * modification, are permitted provided that the following conditions
9 1.1 cgd * are met:
10 1.1 cgd * 1. Redistributions of source code must retain the above copyright
11 1.1 cgd * notice, this list of conditions and the following disclaimer.
12 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 cgd * notice, this list of conditions and the following disclaimer in the
14 1.1 cgd * documentation and/or other materials provided with the distribution.
15 1.17 agc * 3. Neither the name of the University nor the names of its contributors
16 1.1 cgd * may be used to endorse or promote products derived from this software
17 1.1 cgd * without specific prior written permission.
18 1.1 cgd *
19 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 1.1 cgd * SUCH DAMAGE.
30 1.1 cgd */
31 1.1 cgd
32 1.6 lukem #include <sys/cdefs.h>
33 1.1 cgd #ifndef lint
34 1.6 lukem __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
35 1.6 lukem The Regents of the University of California. All rights reserved.\n");
36 1.1 cgd #endif /* not lint */
37 1.1 cgd
38 1.1 cgd #ifndef lint
39 1.4 jtc #if 0
40 1.4 jtc static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93";
41 1.4 jtc #endif
42 1.22 christos __RCSID("$NetBSD: fmt.c,v 1.22 2006/01/04 20:44:57 christos Exp $");
43 1.1 cgd #endif /* not lint */
44 1.1 cgd
45 1.16 wiz #include <ctype.h>
46 1.16 wiz #include <locale.h>
47 1.1 cgd #include <stdio.h>
48 1.3 cgd #include <stdlib.h>
49 1.3 cgd #include <string.h>
50 1.19 christos #include "buffer.h"
51 1.1 cgd
52 1.1 cgd /*
53 1.1 cgd * fmt -- format the concatenation of input files or standard input
54 1.1 cgd * onto standard output. Designed for use with Mail ~|
55 1.1 cgd *
56 1.1 cgd * Syntax : fmt [ goal [ max ] ] [ name ... ]
57 1.1 cgd * Authors: Kurt Shoens (UCB) 12/7/78;
58 1.1 cgd * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
59 1.1 cgd */
60 1.1 cgd
61 1.1 cgd /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
62 1.1 cgd #define GOAL_LENGTH 65
63 1.1 cgd #define MAX_LENGTH 75
64 1.19 christos static size_t goal_length; /* Target or goal line length in output */
65 1.19 christos static size_t max_length; /* Max line length in output */
66 1.19 christos static size_t pfx; /* Current leading blank count */
67 1.19 christos static size_t lineno; /* Current input line */
68 1.19 christos static size_t mark; /* Last place we saw a head line */
69 1.19 christos static int center;
70 1.19 christos static struct buffer outbuf;
71 1.1 cgd
72 1.19 christos static const char *headnames[] = {"To", "Subject", "Cc", 0};
73 1.1 cgd
74 1.16 wiz static void fmt(FILE *);
75 1.16 wiz static int ispref(const char *, const char *);
76 1.16 wiz static void leadin(void);
77 1.16 wiz static void oflush(void);
78 1.19 christos static void pack(const char *, size_t);
79 1.19 christos static void prefix(const struct buffer *, int);
80 1.16 wiz static void split(const char *, int);
81 1.19 christos static void tabulate(struct buffer *);
82 1.10 jdolecek
83 1.19 christos
84 1.19 christos int ishead(const char *);
85 1.6 lukem
86 1.1 cgd /*
87 1.1 cgd * Drive the whole formatter by managing input files. Also,
88 1.1 cgd * cause initialization of the output stuff and flush it out
89 1.1 cgd * at the end.
90 1.1 cgd */
91 1.1 cgd
92 1.6 lukem int
93 1.16 wiz main(int argc, char **argv)
94 1.1 cgd {
95 1.6 lukem FILE *fi;
96 1.6 lukem int errs = 0;
97 1.1 cgd int number; /* LIZ@UOM 6/18/85 */
98 1.1 cgd
99 1.1 cgd goal_length = GOAL_LENGTH;
100 1.1 cgd max_length = MAX_LENGTH;
101 1.19 christos buf_init(&outbuf);
102 1.1 cgd lineno = 1;
103 1.19 christos mark = ~0U;
104 1.5 kleink
105 1.19 christos setprogname(*argv);
106 1.5 kleink setlocale(LC_ALL, "");
107 1.5 kleink
108 1.1 cgd /*
109 1.1 cgd * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
110 1.1 cgd */
111 1.13 abs if (argc > 1 && !strcmp(argv[1], "-C")) {
112 1.12 abs center++;
113 1.12 abs argc--;
114 1.12 abs argv++;
115 1.12 abs }
116 1.1 cgd if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
117 1.1 cgd argv++;
118 1.1 cgd argc--;
119 1.8 ross goal_length = abs(number);
120 1.1 cgd if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
121 1.1 cgd argv++;
122 1.1 cgd argc--;
123 1.8 ross max_length = abs(number);
124 1.1 cgd }
125 1.1 cgd }
126 1.1 cgd if (max_length <= goal_length) {
127 1.19 christos errx(1, "Max length (%zu) must be greater than goal "
128 1.19 christos "length (%zu)", max_length, goal_length);
129 1.1 cgd }
130 1.1 cgd if (argc < 2) {
131 1.1 cgd fmt(stdin);
132 1.1 cgd oflush();
133 1.19 christos return 0;
134 1.1 cgd }
135 1.1 cgd while (--argc) {
136 1.1 cgd if ((fi = fopen(*++argv, "r")) == NULL) {
137 1.19 christos warn("Cannot open `%s'", *argv);
138 1.1 cgd errs++;
139 1.1 cgd continue;
140 1.1 cgd }
141 1.1 cgd fmt(fi);
142 1.1 cgd fclose(fi);
143 1.1 cgd }
144 1.1 cgd oflush();
145 1.19 christos buf_end(&outbuf);
146 1.19 christos return errs;
147 1.1 cgd }
148 1.1 cgd
149 1.1 cgd /*
150 1.1 cgd * Read up characters from the passed input file, forming lines,
151 1.1 cgd * doing ^H processing, expanding tabs, stripping trailing blanks,
152 1.1 cgd * and sending each line down for analysis.
153 1.1 cgd */
154 1.10 jdolecek static void
155 1.16 wiz fmt(FILE *fi)
156 1.1 cgd {
157 1.19 christos struct buffer lbuf, cbuf;
158 1.6 lukem char *cp, *cp2;
159 1.19 christos int c, add_space;
160 1.19 christos size_t len, col;
161 1.1 cgd
162 1.12 abs if (center) {
163 1.19 christos for (;;) {
164 1.19 christos cp = fgetln(fi, &len);
165 1.12 abs if (!cp)
166 1.12 abs return;
167 1.19 christos cp2 = cp + len - 1;
168 1.19 christos while (len-- && isspace((unsigned char)*cp))
169 1.12 abs cp++;
170 1.18 dsl while (cp2 > cp && isspace((unsigned char)*cp2))
171 1.12 abs cp2--;
172 1.12 abs if (cp == cp2)
173 1.21 christos putchar('\n');
174 1.12 abs col = cp2 - cp;
175 1.19 christos if (goal_length > col)
176 1.19 christos for (c = 0; c < (goal_length - col) / 2; c++)
177 1.21 christos putchar(' ');
178 1.12 abs while (cp <= cp2)
179 1.21 christos putchar(*cp++);
180 1.12 abs putchar('\n');
181 1.12 abs }
182 1.12 abs }
183 1.19 christos
184 1.19 christos buf_init(&lbuf);
185 1.19 christos buf_init(&cbuf);
186 1.1 cgd c = getc(fi);
187 1.19 christos
188 1.1 cgd while (c != EOF) {
189 1.1 cgd /*
190 1.1 cgd * Collect a line, doing ^H processing.
191 1.1 cgd * Leave tabs for now.
192 1.1 cgd */
193 1.19 christos buf_reset(&lbuf);
194 1.19 christos while (c != '\n' && c != EOF) {
195 1.1 cgd if (c == '\b') {
196 1.19 christos buf_unputc(&lbuf);
197 1.1 cgd c = getc(fi);
198 1.1 cgd continue;
199 1.1 cgd }
200 1.15 abs if(!(isprint(c) || c == '\t' || c >= 160)) {
201 1.1 cgd c = getc(fi);
202 1.1 cgd continue;
203 1.1 cgd }
204 1.19 christos buf_putc(&lbuf, c);
205 1.1 cgd c = getc(fi);
206 1.1 cgd }
207 1.19 christos buf_putc(&lbuf, '\0');
208 1.19 christos buf_unputc(&lbuf);
209 1.19 christos add_space = c != EOF;
210 1.10 jdolecek
211 1.10 jdolecek /*
212 1.19 christos * Expand tabs on the way.
213 1.1 cgd */
214 1.1 cgd col = 0;
215 1.19 christos cp = lbuf.bptr;
216 1.19 christos buf_reset(&cbuf);
217 1.19 christos while ((c = *cp++) != '\0') {
218 1.1 cgd if (c != '\t') {
219 1.1 cgd col++;
220 1.19 christos buf_putc(&cbuf, c);
221 1.1 cgd continue;
222 1.1 cgd }
223 1.1 cgd do {
224 1.19 christos buf_putc(&cbuf, ' ');
225 1.1 cgd col++;
226 1.1 cgd } while ((col & 07) != 0);
227 1.1 cgd }
228 1.1 cgd
229 1.1 cgd /*
230 1.1 cgd * Swipe trailing blanks from the line.
231 1.1 cgd */
232 1.19 christos for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
233 1.19 christos continue;
234 1.19 christos cbuf.ptr = cp2 + 1;
235 1.19 christos buf_putc(&cbuf, '\0');
236 1.19 christos buf_unputc(&cbuf);
237 1.19 christos prefix(&cbuf, add_space);
238 1.1 cgd if (c != EOF)
239 1.1 cgd c = getc(fi);
240 1.1 cgd }
241 1.19 christos buf_end(&cbuf);
242 1.19 christos buf_end(&lbuf);
243 1.1 cgd }
244 1.1 cgd
245 1.1 cgd /*
246 1.1 cgd * Take a line devoid of tabs and other garbage and determine its
247 1.1 cgd * blank prefix. If the indent changes, call for a linebreak.
248 1.1 cgd * If the input line is blank, echo the blank line on the output.
249 1.1 cgd * Finally, if the line minus the prefix is a mail header, try to keep
250 1.1 cgd * it on a line by itself.
251 1.1 cgd */
252 1.10 jdolecek static void
253 1.19 christos prefix(const struct buffer *buf, int add_space)
254 1.1 cgd {
255 1.10 jdolecek const char *cp;
256 1.19 christos const char **hp;
257 1.19 christos size_t np;
258 1.19 christos int h;
259 1.1 cgd
260 1.19 christos if (buf->ptr == buf->bptr) {
261 1.1 cgd oflush();
262 1.1 cgd putchar('\n');
263 1.1 cgd return;
264 1.1 cgd }
265 1.19 christos for (cp = buf->bptr; *cp == ' '; cp++)
266 1.19 christos continue;
267 1.19 christos np = cp - buf->bptr;
268 1.1 cgd
269 1.1 cgd /*
270 1.1 cgd * The following horrible expression attempts to avoid linebreaks
271 1.1 cgd * when the indent changes due to a paragraph.
272 1.1 cgd */
273 1.19 christos if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
274 1.19 christos oflush();
275 1.19 christos if ((h = ishead(cp)) != 0) {
276 1.1 cgd oflush();
277 1.19 christos mark = lineno;
278 1.19 christos }
279 1.19 christos if (mark == ~0U || (lineno - mark < 3 && lineno - mark > 0))
280 1.19 christos for (hp = &headnames[0]; *hp != NULL; hp++)
281 1.1 cgd if (ispref(*hp, cp)) {
282 1.1 cgd h = 1;
283 1.1 cgd oflush();
284 1.1 cgd break;
285 1.1 cgd }
286 1.1 cgd if (!h && (h = (*cp == '.')))
287 1.1 cgd oflush();
288 1.1 cgd pfx = np;
289 1.10 jdolecek if (h) {
290 1.19 christos pack(cp, (size_t)(buf->ptr - cp));
291 1.1 cgd oflush();
292 1.10 jdolecek } else
293 1.10 jdolecek split(cp, add_space);
294 1.1 cgd lineno++;
295 1.1 cgd }
296 1.1 cgd
297 1.1 cgd /*
298 1.1 cgd * Split up the passed line into output "words" which are
299 1.1 cgd * maximal strings of non-blanks with the blank separation
300 1.1 cgd * attached at the end. Pass these words along to the output
301 1.1 cgd * line packer.
302 1.1 cgd */
303 1.10 jdolecek static void
304 1.16 wiz split(const char line[], int add_space)
305 1.1 cgd {
306 1.10 jdolecek const char *cp;
307 1.19 christos struct buffer word;
308 1.19 christos size_t wlen;
309 1.1 cgd
310 1.19 christos buf_init(&word);
311 1.1 cgd cp = line;
312 1.1 cgd while (*cp) {
313 1.19 christos buf_reset(&word);
314 1.19 christos wlen = 0;
315 1.1 cgd
316 1.1 cgd /*
317 1.1 cgd * Collect a 'word,' allowing it to contain escaped white
318 1.1 cgd * space.
319 1.1 cgd */
320 1.1 cgd while (*cp && *cp != ' ') {
321 1.9 christos if (*cp == '\\' && isspace((unsigned char)cp[1]))
322 1.19 christos buf_putc(&word, *cp++);
323 1.19 christos buf_putc(&word, *cp++);
324 1.19 christos wlen++;
325 1.1 cgd }
326 1.1 cgd
327 1.1 cgd /*
328 1.1 cgd * Guarantee a space at end of line. Two spaces after end of
329 1.1 cgd * sentence punctuation.
330 1.1 cgd */
331 1.10 jdolecek if (*cp == '\0' && add_space) {
332 1.19 christos buf_putc(&word, ' ');
333 1.7 lukem if (strchr(".:!", cp[-1]))
334 1.19 christos buf_putc(&word, ' ');
335 1.1 cgd }
336 1.1 cgd while (*cp == ' ')
337 1.19 christos buf_putc(&word, *cp++);
338 1.19 christos
339 1.19 christos buf_putc(&word, '\0');
340 1.19 christos buf_unputc(&word);
341 1.19 christos
342 1.19 christos pack(word.bptr, wlen);
343 1.1 cgd }
344 1.19 christos buf_end(&word);
345 1.1 cgd }
346 1.1 cgd
347 1.1 cgd /*
348 1.1 cgd * Output section.
349 1.1 cgd * Build up line images from the words passed in. Prefix
350 1.20 christos * each line with correct number of blanks.
351 1.20 christos *
352 1.20 christos * At the bottom of this whole mess, leading tabs are reinserted.
353 1.1 cgd */
354 1.1 cgd
355 1.1 cgd /*
356 1.1 cgd * Pack a word onto the output line. If this is the beginning of
357 1.1 cgd * the line, push on the appropriately-sized string of blanks first.
358 1.1 cgd * If the word won't fit on the current line, flush and begin a new
359 1.1 cgd * line. If the word is too long to fit all by itself on a line,
360 1.1 cgd * just give it its own and hope for the best.
361 1.1 cgd *
362 1.1 cgd * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
363 1.1 cgd * goal length, take it. If not, then check to see if the line
364 1.1 cgd * will be over the max length; if so put the word on the next
365 1.1 cgd * line. If not, check to see if the line will be closer to the
366 1.1 cgd * goal length with or without the word and take it or put it on
367 1.1 cgd * the next line accordingly.
368 1.1 cgd */
369 1.1 cgd
370 1.10 jdolecek static void
371 1.19 christos pack(const char *word, size_t wlen)
372 1.1 cgd {
373 1.10 jdolecek const char *cp;
374 1.19 christos size_t s, t;
375 1.1 cgd
376 1.19 christos if (outbuf.bptr == outbuf.ptr)
377 1.1 cgd leadin();
378 1.1 cgd /*
379 1.1 cgd * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
380 1.1 cgd * length of the line before the word is added; t is now the length
381 1.1 cgd * of the line after the word is added
382 1.1 cgd */
383 1.19 christos s = outbuf.ptr - outbuf.bptr;
384 1.19 christos t = wlen + s;
385 1.22 christos if ((t <= goal_length) || ((t <= max_length) &&
386 1.22 christos (s <= goal_length) && (t - goal_length <= goal_length - s))) {
387 1.1 cgd /*
388 1.1 cgd * In like flint!
389 1.1 cgd */
390 1.19 christos for (cp = word; *cp;)
391 1.19 christos buf_putc(&outbuf, *cp++);
392 1.1 cgd return;
393 1.1 cgd }
394 1.1 cgd if (s > pfx) {
395 1.1 cgd oflush();
396 1.1 cgd leadin();
397 1.1 cgd }
398 1.19 christos for (cp = word; *cp;)
399 1.19 christos buf_putc(&outbuf, *cp++);
400 1.1 cgd }
401 1.1 cgd
402 1.1 cgd /*
403 1.1 cgd * If there is anything on the current output line, send it on
404 1.20 christos * its way. Reset outbuf.
405 1.1 cgd */
406 1.10 jdolecek static void
407 1.16 wiz oflush(void)
408 1.1 cgd {
409 1.19 christos if (outbuf.bptr == outbuf.ptr)
410 1.1 cgd return;
411 1.19 christos buf_putc(&outbuf, '\0');
412 1.19 christos buf_unputc(&outbuf);
413 1.19 christos tabulate(&outbuf);
414 1.19 christos buf_reset(&outbuf);
415 1.1 cgd }
416 1.1 cgd
417 1.1 cgd /*
418 1.1 cgd * Take the passed line buffer, insert leading tabs where possible, and
419 1.1 cgd * output on standard output (finally).
420 1.1 cgd */
421 1.10 jdolecek static void
422 1.19 christos tabulate(struct buffer *buf)
423 1.1 cgd {
424 1.6 lukem char *cp;
425 1.19 christos size_t b, t;
426 1.1 cgd
427 1.1 cgd /*
428 1.1 cgd * Toss trailing blanks in the output line.
429 1.1 cgd */
430 1.19 christos for (cp = buf->ptr; cp >= buf->bptr && *cp == ' '; cp--)
431 1.19 christos continue;
432 1.19 christos *cp = '\0';
433 1.1 cgd
434 1.1 cgd /*
435 1.1 cgd * Count the leading blank space and tabulate.
436 1.1 cgd */
437 1.19 christos for (cp = buf->bptr; *cp == ' '; cp++)
438 1.19 christos continue;
439 1.19 christos b = cp - buf->bptr;
440 1.20 christos t = b / 8;
441 1.20 christos b = b % 8;
442 1.1 cgd if (t > 0)
443 1.1 cgd do
444 1.21 christos putchar('\t');
445 1.1 cgd while (--t);
446 1.1 cgd if (b > 0)
447 1.1 cgd do
448 1.21 christos putchar(' ');
449 1.1 cgd while (--b);
450 1.1 cgd while (*cp)
451 1.21 christos putchar(*cp++);
452 1.21 christos putchar('\n');
453 1.1 cgd }
454 1.1 cgd
455 1.1 cgd /*
456 1.1 cgd * Initialize the output line with the appropriate number of
457 1.1 cgd * leading blanks.
458 1.1 cgd */
459 1.10 jdolecek static void
460 1.16 wiz leadin(void)
461 1.1 cgd {
462 1.19 christos size_t b;
463 1.19 christos
464 1.19 christos buf_reset(&outbuf);
465 1.1 cgd
466 1.19 christos for (b = 0; b < pfx; b++)
467 1.19 christos buf_putc(&outbuf, ' ');
468 1.1 cgd }
469 1.1 cgd
470 1.1 cgd /*
471 1.1 cgd * Is s1 a prefix of s2??
472 1.1 cgd */
473 1.10 jdolecek static int
474 1.16 wiz ispref(const char *s1, const char *s2)
475 1.1 cgd {
476 1.1 cgd
477 1.1 cgd while (*s1++ == *s2)
478 1.19 christos continue;
479 1.19 christos return *s1 == '\0';
480 1.1 cgd }
481