deroff.c revision 1.9 1 /* $NetBSD: deroff.c,v 1.9 2011/08/31 13:38:19 joerg Exp $ */
2
3 /* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */
4
5 /*-
6 * Copyright (c) 1988, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33 /*
34 * Copyright (C) Caldera International Inc. 2001-2002.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code and documentation must retain the above
41 * copyright notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed or owned by Caldera
48 * International, Inc.
49 * 4. Neither the name of Caldera International, Inc. nor the names of other
50 * contributors may be used to endorse or promote products derived from
51 * this software without specific prior written permission.
52 *
53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64 * POSSIBILITY OF SUCH DAMAGE.
65 */
66
67 #ifndef lint
68 static const char copyright[] =
69 "@(#) Copyright (c) 1988, 1993\n\
70 The Regents of the University of California. All rights reserved.\n";
71 #endif /* not lint */
72
73 #ifndef lint
74 #if 0
75 static const char sccsid[] = "@(#)deroff.c 8.1 (Berkeley) 6/6/93";
76 #else
77 static const char rcsid[] = "$NetBSD: deroff.c,v 1.9 2011/08/31 13:38:19 joerg Exp $";
78 #endif
79 #endif /* not lint */
80
81 #include <sys/cdefs.h>
82 #include <err.h>
83 #include <limits.h>
84 #include <stddef.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <unistd.h>
89
90 /*
91 * Deroff command -- strip troff, eqn, and Tbl sequences from
92 * a file. Has two flags argument, -w, to cause output one word per line
93 * rather than in the original format.
94 * -mm (or -ms) causes the corresponding macro's to be interpreted
95 * so that just sentences are output
96 * -ml also gets rid of lists.
97 * Deroff follows .so and .nx commands, removes contents of macro
98 * definitions, equations (both .EQ ... .EN and $...$),
99 * Tbl command sequences, and Troff backslash constructions.
100 *
101 * All input is through the Cget macro;
102 * the most recently read character is in c.
103 *
104 * Modified by Robert Henry to process -me and -man macros.
105 */
106
107 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
108 #define C1get ( (c=getc(infile)) == EOF ? eof() : c)
109
110 #ifdef DEBUG
111 # define C _C()
112 # define C1 _C1()
113 #else /* not DEBUG */
114 # define C Cget
115 # define C1 C1get
116 #endif /* not DEBUG */
117
118 #define SKIP while (C != '\n')
119 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
120
121 #define YES 1
122 #define NO 0
123 #define MS 0 /* -ms */
124 #define MM 1 /* -mm */
125 #define ME 2 /* -me */
126 #define MA 3 /* -man */
127
128 #ifdef DEBUG
129 static char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
130 #endif /* DEBUG */
131
132 #define ONE 1
133 #define TWO 2
134
135 #define NOCHAR -2
136 #define SPECIAL 0
137 #define APOS 1
138 #define PUNCT 2
139 #define DIGIT 3
140 #define LETTER 4
141
142 #define MAXFILES 20
143
144 static int iflag;
145 static int wordflag;
146 static int msflag; /* processing a source written using a mac package */
147 static int mac; /* which package */
148 static int disp;
149 static int parag;
150 static int inmacro;
151 static int intable;
152 static int keepblock; /* keep blocks of text; normally false when msflag */
153
154 static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
155
156 static char line[LINE_MAX];
157 static char *lp;
158
159 static int c;
160 static int pc;
161 static int ldelim;
162 static int rdelim;
163
164 static char fname[PATH_MAX];
165 static FILE *files[MAXFILES];
166 static FILE **filesp;
167 static FILE *infile;
168
169 static int argc;
170 static char **argv;
171
172 /*
173 * Macro processing
174 *
175 * Macro table definitions
176 */
177 typedef int pacmac; /* compressed macro name */
178 static int argconcat = 0; /* concat arguments together (-me only) */
179
180 #define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
181 #define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF))
182
183 struct mactab {
184 int condition;
185 pacmac macname;
186 int (*func)(pacmac);
187 };
188
189 static const struct mactab troffmactab[];
190 static const struct mactab ppmactab[];
191 static const struct mactab msmactab[];
192 static const struct mactab mmmactab[];
193 static const struct mactab memactab[];
194 static const struct mactab manmactab[];
195
196 /*
197 * Macro table initialization
198 */
199 #define M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
200
201 /*
202 * Flags for matching conditions other than
203 * the macro name
204 */
205 #define NONE 0
206 #define FNEST 1 /* no nested files */
207 #define NOMAC 2 /* no macro */
208 #define MAC 3 /* macro */
209 #define PARAG 4 /* in a paragraph */
210 #define MSF 5 /* msflag is on */
211 #define NBLK 6 /* set if no blocks to be kept */
212
213 /*
214 * Return codes from macro minions, determine where to jump,
215 * how to repeat/reprocess text
216 */
217 #define COMX 1 /* goto comx */
218 #define COM 2 /* goto com */
219
220 static int skeqn(void);
221 static int eof(void);
222 #ifdef DEBUG
223 static int _C1(void);
224 static int _C(void);
225 #endif
226 static int EQ(pacmac);
227 static int domacro(pacmac);
228 static int PS(pacmac);
229 static int skip(pacmac);
230 static int intbl(pacmac);
231 static int outtbl(pacmac);
232 static int so(pacmac);
233 static int nx(pacmac);
234 static int skiptocom(pacmac);
235 static int PP(pacmac);
236 static int AU(pacmac);
237 static int SH(pacmac);
238 static int UX(pacmac);
239 static int MMHU(pacmac);
240 static int mesnblock(pacmac);
241 static int mssnblock(pacmac);
242 static int nf(pacmac);
243 static int ce(pacmac);
244 static int meip(pacmac);
245 static int mepp(pacmac);
246 static int mesh(pacmac);
247 static int mefont(pacmac);
248 static int manfont(pacmac);
249 static int manpp(pacmac);
250 static int macsort(const void *, const void *);
251 static int sizetab(const struct mactab *);
252 static void getfname(void);
253 static void textline(char *, int);
254 static void work(void) __dead;
255 static void regline(void (*)(char *, int), int);
256 static void macro(void);
257 static void tbl(void);
258 static void stbl(void);
259 static void eqn(void);
260 static void backsl(void);
261 static void sce(void);
262 static void refer(int);
263 static void inpic(void);
264 static void msputmac(char *, int);
265 static void msputwords(int);
266 static void meputmac(char *, int);
267 static void meputwords(int);
268 static void noblock(char, char);
269 static void defcomline(pacmac);
270 static void comline(void);
271 static void buildtab(const struct mactab **, int *);
272 static FILE *opn(char *);
273 static struct mactab *macfill(struct mactab *, const struct mactab *);
274 static void usage(void) __dead;
275
276 int
277 main(int ac, char **av)
278 {
279 int i, ch;
280 int errflg = 0;
281 int kflag = NO;
282
283 iflag = NO;
284 wordflag = NO;
285 msflag = NO;
286 mac = ME;
287 disp = NO;
288 parag = NO;
289 inmacro = NO;
290 intable = NO;
291 ldelim = NOCHAR;
292 rdelim = NOCHAR;
293 keepblock = YES;
294
295 while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
296 switch (ch) {
297 case 'i':
298 iflag = YES;
299 break;
300 case 'k':
301 kflag = YES;
302 break;
303 case 'm':
304 msflag = YES;
305 keepblock = NO;
306 switch (optarg[0]) {
307 case 'm':
308 mac = MM;
309 break;
310 case 's':
311 mac = MS;
312 break;
313 case 'e':
314 mac = ME;
315 break;
316 case 'a':
317 mac = MA;
318 break;
319 case 'l':
320 disp = YES;
321 break;
322 default:
323 errflg++;
324 break;
325 }
326 if (errflg == 0 && optarg[1] != '\0')
327 errflg++;
328 break;
329 case 'p':
330 parag = YES;
331 break;
332 case 'w':
333 wordflag = YES;
334 kflag = YES;
335 break;
336 default:
337 errflg++;
338 }
339 }
340 argc = ac - optind;
341 argv = av + optind;
342
343 if (kflag)
344 keepblock = YES;
345 if (errflg)
346 usage();
347
348 #ifdef DEBUG
349 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
350 msflag, mactab[mac], keepblock, disp);
351 #endif /* DEBUG */
352 if (argc == 0) {
353 infile = stdin;
354 } else {
355 infile = opn(argv[0]);
356 --argc;
357 ++argv;
358 }
359 files[0] = infile;
360 filesp = &files[0];
361
362 for (i = 'a'; i <= 'z' ; ++i)
363 chars[i] = LETTER;
364 for (i = 'A'; i <= 'Z'; ++i)
365 chars[i] = LETTER;
366 for (i = '0'; i <= '9'; ++i)
367 chars[i] = DIGIT;
368 chars['\''] = APOS;
369 chars['&'] = APOS;
370 chars['.'] = PUNCT;
371 chars[','] = PUNCT;
372 chars[';'] = PUNCT;
373 chars['?'] = PUNCT;
374 chars[':'] = PUNCT;
375 work();
376 return 0;
377 }
378
379 static int
380 skeqn(void)
381 {
382
383 while ((c = getc(infile)) != rdelim) {
384 if (c == EOF)
385 c = eof();
386 else if (c == '"') {
387 while ((c = getc(infile)) != '"') {
388 if (c == EOF ||
389 (c == '\\' && (c = getc(infile)) == EOF))
390 c = eof();
391 }
392 }
393 }
394 if (msflag)
395 return c == 'x';
396 return c == ' ';
397 }
398
399 static FILE *
400 opn(char *p)
401 {
402 FILE *fd;
403
404 if ((fd = fopen(p, "r")) == NULL)
405 err(1, "fopen %s", p);
406
407 return fd;
408 }
409
410 static int
411 eof(void)
412 {
413
414 if (infile != stdin)
415 fclose(infile);
416 if (filesp > files)
417 infile = *--filesp;
418 else if (argc > 0) {
419 infile = opn(argv[0]);
420 --argc;
421 ++argv;
422 } else
423 exit(0);
424 return C;
425 }
426
427 static void
428 getfname(void)
429 {
430 char *p;
431 struct chain {
432 struct chain *nextp;
433 char *datap;
434 } *q;
435 static struct chain *namechain= NULL;
436
437 while (C == ' ')
438 ; /* nothing */
439
440 for (p = fname ; p - fname < (ptrdiff_t)sizeof(fname) &&
441 (*p = c) != '\n' &&
442 c != ' ' && c != '\t' && c != '\\'; ++p)
443 C;
444 *p = '\0';
445 while (c != '\n')
446 C;
447
448 /* see if this name has already been used */
449 for (q = namechain ; q; q = q->nextp)
450 if (strcmp(fname, q->datap) == 0) {
451 fname[0] = '\0';
452 return;
453 }
454
455 q = (struct chain *) malloc(sizeof(struct chain));
456 if (q == NULL)
457 err(1, NULL);
458 q->nextp = namechain;
459 q->datap = strdup(fname);
460 if (q->datap == NULL)
461 err(1, NULL);
462 namechain = q;
463 }
464
465 /*ARGSUSED*/
466 static void
467 textline(char *str, int constant)
468 {
469
470 if (wordflag) {
471 msputwords(0);
472 return;
473 }
474 puts(str);
475 }
476
477 static void
478 work(void)
479 {
480
481 for (;;) {
482 C;
483 #ifdef FULLDEBUG
484 printf("Starting work with `%c'\n", c);
485 #endif /* FULLDEBUG */
486 if (c == '.' || c == '\'')
487 comline();
488 else
489 regline(textline, TWO);
490 }
491 }
492
493 static void
494 regline(void (*pfunc)(char *, int), int constant)
495 {
496
497 line[0] = c;
498 lp = line;
499 while (lp - line < (ptrdiff_t)sizeof(line)) {
500 if (c == '\\') {
501 *lp = ' ';
502 backsl();
503 }
504 if (c == '\n')
505 break;
506 if (intable && c == 'T') {
507 *++lp = C;
508 if (c == '{' || c == '}') {
509 lp[-1] = ' ';
510 *lp = C;
511 }
512 } else {
513 *++lp = C;
514 }
515 }
516 *lp = '\0';
517
518 if (line[0] != '\0')
519 (*pfunc)(line, constant);
520 }
521
522 static void
523 macro(void)
524 {
525
526 if (msflag) {
527 do {
528 SKIP;
529 } while (C!='.' || C!='.' || C=='.'); /* look for .. */
530 if (c != '\n')
531 SKIP;
532 return;
533 }
534 SKIP;
535 inmacro = YES;
536 }
537
538 static void
539 tbl(void)
540 {
541
542 while (C != '.')
543 ; /* nothing */
544 SKIP;
545 intable = YES;
546 }
547
548 static void
549 stbl(void)
550 {
551
552 while (C != '.')
553 ; /* nothing */
554 SKIP_TO_COM;
555 if (c != 'T' || C != 'E') {
556 SKIP;
557 pc = c;
558 while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
559 pc = c;
560 }
561 }
562
563 static void
564 eqn(void)
565 {
566 int c1, c2;
567 int dflg;
568 char last;
569
570 last=0;
571 dflg = 1;
572 SKIP;
573
574 for (;;) {
575 if (C1 == '.' || c == '\'') {
576 while (C1 == ' ' || c == '\t')
577 ;
578 if (c == 'E' && C1 == 'N') {
579 SKIP;
580 if (msflag && dflg) {
581 putchar('x');
582 putchar(' ');
583 if (last) {
584 putchar(last);
585 putchar('\n');
586 }
587 }
588 return;
589 }
590 } else if (c == 'd') {
591 /* look for delim */
592 if (C1 == 'e' && C1 == 'l')
593 if (C1 == 'i' && C1 == 'm') {
594 while (C1 == ' ')
595 ; /* nothing */
596
597 if ((c1 = c) == '\n' ||
598 (c2 = C1) == '\n' ||
599 (c1 == 'o' && c2 == 'f' && C1=='f')) {
600 ldelim = NOCHAR;
601 rdelim = NOCHAR;
602 } else {
603 ldelim = c1;
604 rdelim = c2;
605 }
606 }
607 dflg = 0;
608 }
609
610 if (c != '\n')
611 while (C1 != '\n') {
612 if (chars[c] == PUNCT)
613 last = c;
614 else if (c != ' ')
615 last = 0;
616 }
617 }
618 }
619
620 /* skip over a complete backslash construction */
621 static void
622 backsl(void)
623 {
624 int bdelim;
625
626 sw:
627 switch (C) {
628 case '"':
629 SKIP;
630 return;
631
632 case 's':
633 if (C == '\\')
634 backsl();
635 else {
636 while (C >= '0' && c <= '9')
637 ; /* nothing */
638 ungetc(c, infile);
639 c = '0';
640 }
641 --lp;
642 return;
643
644 case 'f':
645 case 'n':
646 case '*':
647 if (C != '(')
648 return;
649
650 case '(':
651 if (msflag) {
652 if (C == 'e') {
653 if (C == 'm') {
654 *lp = '-';
655 return;
656 }
657 }
658 else if (c != '\n')
659 C;
660 return;
661 }
662 if (C != '\n')
663 C;
664 return;
665
666 case '$':
667 C; /* discard argument number */
668 return;
669
670 case 'b':
671 case 'x':
672 case 'v':
673 case 'h':
674 case 'w':
675 case 'o':
676 case 'l':
677 case 'L':
678 if ((bdelim = C) == '\n')
679 return;
680 while (C != '\n' && c != bdelim)
681 if (c == '\\')
682 backsl();
683 return;
684
685 case '\\':
686 if (inmacro)
687 goto sw;
688
689 default:
690 return;
691 }
692 }
693
694 static void
695 sce(void)
696 {
697 char *ap;
698 int n, i;
699 char a[10];
700
701 for (ap = a; C != '\n'; ap++) {
702 *ap = c;
703 if (ap == &a[9]) {
704 SKIP;
705 ap = a;
706 break;
707 }
708 }
709 if (ap != a)
710 n = atoi(a);
711 else
712 n = 1;
713 for (i = 0; i < n;) {
714 if (C == '.') {
715 if (C == 'c') {
716 if (C == 'e') {
717 while (C == ' ')
718 ; /* nothing */
719 if (c == '0') {
720 SKIP;
721 break;
722 } else
723 SKIP;
724 }
725 else
726 SKIP;
727 } else if (c == 'P' || C == 'P') {
728 if (c != '\n')
729 SKIP;
730 break;
731 } else if (c != '\n')
732 SKIP;
733 } else {
734 SKIP;
735 i++;
736 }
737 }
738 }
739
740 static void
741 refer(int c1)
742 {
743 int c2;
744
745 if (c1 != '\n')
746 SKIP;
747
748 for (c2 = -1;;) {
749 if (C != '.')
750 SKIP;
751 else {
752 if (C != ']')
753 SKIP;
754 else {
755 while (C != '\n')
756 c2 = c;
757 if (c2 != -1 && chars[c2] == PUNCT)
758 putchar(c2);
759 return;
760 }
761 }
762 }
763 }
764
765 static void
766 inpic(void)
767 {
768 int c1;
769 char *p1;
770
771 SKIP;
772 p1 = line;
773 c = '\n';
774 for (;;) {
775 c1 = c;
776 if (C == '.' && c1 == '\n') {
777 if (C != 'P') {
778 if (c == '\n')
779 continue;
780 else {
781 SKIP;
782 c = '\n';
783 continue;
784 }
785 }
786 if (C != 'E') {
787 if (c == '\n')
788 continue;
789 else {
790 SKIP;
791 c = '\n';
792 continue;
793 }
794 }
795 SKIP;
796 return;
797 }
798 else if (c == '\"') {
799 while (C != '\"') {
800 if (c == '\\') {
801 if (C == '\"')
802 continue;
803 ungetc(c, infile);
804 backsl();
805 } else
806 *p1++ = c;
807 }
808 *p1++ = ' ';
809 }
810 else if (c == '\n' && p1 != line) {
811 *p1 = '\0';
812 if (wordflag)
813 msputwords(NO);
814 else {
815 puts(line);
816 putchar('\n');
817 }
818 p1 = line;
819 }
820 }
821 }
822
823 #ifdef DEBUG
824 static int
825 _C1(void)
826 {
827
828 return C1get;
829 }
830
831 static int
832 _C(void)
833 {
834
835 return Cget;
836 }
837 #endif /* DEBUG */
838
839 /*
840 * Put out a macro line, using ms and mm conventions.
841 */
842 static void
843 msputmac(char *s, int constant)
844 {
845 char *t;
846 int found;
847 int last;
848
849 last = 0;
850 found = 0;
851 if (wordflag) {
852 msputwords(YES);
853 return;
854 }
855 while (*s) {
856 while (*s == ' ' || *s == '\t')
857 putchar(*s++);
858 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
859 ; /* nothing */
860 if (*s == '\"')
861 s++;
862 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
863 chars[(unsigned char)s[1]] == LETTER) {
864 while (s < t)
865 if (*s == '\"')
866 s++;
867 else
868 putchar(*s++);
869 last = *(t-1);
870 found++;
871 } else if (found && chars[(unsigned char)s[0]] == PUNCT &&
872 s[1] == '\0') {
873 putchar(*s++);
874 } else {
875 last = *(t - 1);
876 s = t;
877 }
878 }
879 putchar('\n');
880 if (msflag && chars[last] == PUNCT) {
881 putchar(last);
882 putchar('\n');
883 }
884 }
885
886 /*
887 * put out words (for the -w option) with ms and mm conventions
888 */
889 static void
890 msputwords(int macline)
891 {
892 char *p, *p1;
893 int i, nlet;
894
895 for (p1 = line;;) {
896 /*
897 * skip initial specials ampersands and apostrophes
898 */
899 while (chars[(unsigned char)*p1] < DIGIT)
900 if (*p1++ == '\0')
901 return;
902 nlet = 0;
903 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
904 if (i == LETTER)
905 ++nlet;
906
907 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
908 /*
909 * delete trailing ampersands and apostrophes
910 */
911 while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
912 i == APOS )
913 --p;
914 while (p1 < p)
915 putchar(*p1++);
916 putchar('\n');
917 } else {
918 p1 = p;
919 }
920 }
921 }
922
923 /*
924 * put out a macro using the me conventions
925 */
926 #define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; }
927 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
928
929 static void
930 meputmac(char *cp, int constant)
931 {
932 char *np;
933 int found;
934 int argno;
935 int last;
936 int inquote;
937
938 last = 0;
939 found = 0;
940 if (wordflag) {
941 meputwords(YES);
942 return;
943 }
944 for (argno = 0; *cp; argno++) {
945 SKIPBLANK(cp);
946 inquote = (*cp == '"');
947 if (inquote)
948 cp++;
949 for (np = cp; *np; np++) {
950 switch (*np) {
951 case '\n':
952 case '\0':
953 break;
954
955 case '\t':
956 case ' ':
957 if (inquote)
958 continue;
959 else
960 goto endarg;
961
962 case '"':
963 if (inquote && np[1] == '"') {
964 memmove(np, np + 1, strlen(np));
965 np++;
966 continue;
967 } else {
968 *np = ' '; /* bye bye " */
969 goto endarg;
970 }
971
972 default:
973 continue;
974 }
975 }
976 endarg: ;
977 /*
978 * cp points at the first char in the arg
979 * np points one beyond the last char in the arg
980 */
981 if ((argconcat == 0) || (argconcat != argno))
982 putchar(' ');
983 #ifdef FULLDEBUG
984 {
985 char *p;
986 printf("[%d,%d: ", argno, np - cp);
987 for (p = cp; p < np; p++) {
988 putchar(*p);
989 }
990 printf("]");
991 }
992 #endif /* FULLDEBUG */
993 /*
994 * Determine if the argument merits being printed
995 *
996 * constant is the cut off point below which something
997 * is not a word.
998 */
999 if (((np - cp) > constant) &&
1000 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
1001 for (; cp < np; cp++)
1002 putchar(*cp);
1003 last = np[-1];
1004 found++;
1005 } else if (found && (np - cp == 1) &&
1006 chars[(unsigned char)*cp] == PUNCT) {
1007 putchar(*cp);
1008 } else {
1009 last = np[-1];
1010 }
1011 cp = np;
1012 }
1013 if (msflag && chars[last] == PUNCT)
1014 putchar(last);
1015 putchar('\n');
1016 }
1017
1018 /*
1019 * put out words (for the -w option) with ms and mm conventions
1020 */
1021 static void
1022 meputwords(int macline)
1023 {
1024
1025 msputwords(macline);
1026 }
1027
1028 /*
1029 *
1030 * Skip over a nested set of macros
1031 *
1032 * Possible arguments to noblock are:
1033 *
1034 * fi end of unfilled text
1035 * PE pic ending
1036 * DE display ending
1037 *
1038 * for ms and mm only:
1039 * KE keep ending
1040 *
1041 * NE undocumented match to NS (for mm?)
1042 * LE mm only: matches RL or *L (for lists)
1043 *
1044 * for me:
1045 * ([lqbzcdf]
1046 */
1047 static void
1048 noblock(char a1, char a2)
1049 {
1050 int c1,c2;
1051 int eqnf;
1052 int lct;
1053
1054 lct = 0;
1055 eqnf = 1;
1056 SKIP;
1057 for (;;) {
1058 while (C != '.')
1059 if (c == '\n')
1060 continue;
1061 else
1062 SKIP;
1063 if ((c1 = C) == '\n')
1064 continue;
1065 if ((c2 = C) == '\n')
1066 continue;
1067 if (c1 == a1 && c2 == a2) {
1068 SKIP;
1069 if (lct != 0) {
1070 lct--;
1071 continue;
1072 }
1073 if (eqnf)
1074 putchar('.');
1075 putchar('\n');
1076 return;
1077 } else if (a1 == 'L' && c2 == 'L') {
1078 lct++;
1079 SKIP;
1080 }
1081 /*
1082 * equations (EQ) nested within a display
1083 */
1084 else if (c1 == 'E' && c2 == 'Q') {
1085 if ((mac == ME && a1 == ')')
1086 || (mac != ME && a1 == 'D')) {
1087 eqn();
1088 eqnf=0;
1089 }
1090 }
1091 /*
1092 * turning on filling is done by the paragraphing
1093 * macros
1094 */
1095 else if (a1 == 'f') { /* .fi */
1096 if ((mac == ME && (c2 == 'h' || c2 == 'p'))
1097 || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1098 SKIP;
1099 return;
1100 }
1101 } else {
1102 SKIP;
1103 }
1104 }
1105 }
1106
1107 static int
1108 /*ARGSUSED*/
1109 EQ(pacmac unused)
1110 {
1111
1112 eqn();
1113 return 0;
1114 }
1115
1116 static int
1117 /*ARGSUSED*/
1118 domacro(pacmac unused)
1119 {
1120
1121 macro();
1122 return 0;
1123 }
1124
1125 static int
1126 /*ARGSUSED*/
1127 PS(pacmac unused)
1128 {
1129
1130 for (C; c == ' ' || c == '\t'; C)
1131 ; /* nothing */
1132
1133 if (c == '<') { /* ".PS < file" -- don't expect a .PE */
1134 SKIP;
1135 return 0;
1136 }
1137 if (!msflag)
1138 inpic();
1139 else
1140 noblock('P', 'E');
1141 return 0;
1142 }
1143
1144 static int
1145 /*ARGSUSED*/
1146 skip(pacmac unused)
1147 {
1148
1149 SKIP;
1150 return 0;
1151 }
1152
1153 static int
1154 /*ARGSUSED*/
1155 intbl(pacmac unused)
1156 {
1157
1158 if (msflag)
1159 stbl();
1160 else
1161 tbl();
1162 return 0;
1163 }
1164
1165 static int
1166 /*ARGSUSED*/
1167 outtbl(pacmac unused)
1168 {
1169
1170 intable = NO;
1171 return 0;
1172 }
1173
1174 static int
1175 /*ARGSUSED*/
1176 so(pacmac unused)
1177 {
1178
1179 if (!iflag) {
1180 getfname();
1181 if (fname[0]) {
1182 if (++filesp - &files[0] > MAXFILES)
1183 err(1, "too many nested files (max %d)",
1184 MAXFILES);
1185 infile = *filesp = opn(fname);
1186 }
1187 }
1188 return 0;
1189 }
1190
1191 static int
1192 /*ARGSUSED*/
1193 nx(pacmac unused)
1194 {
1195
1196 if (!iflag) {
1197 getfname();
1198 if (fname[0] == '\0')
1199 exit(0);
1200 if (infile != stdin)
1201 fclose(infile);
1202 infile = *filesp = opn(fname);
1203 }
1204 return 0;
1205 }
1206
1207 static int
1208 /*ARGSUSED*/
1209 skiptocom(pacmac unused)
1210 {
1211
1212 SKIP_TO_COM;
1213 return COMX;
1214 }
1215
1216 static int
1217 PP(pacmac c12)
1218 {
1219 int c1, c2;
1220
1221 frommac(c12, c1, c2);
1222 printf(".%c%c", c1, c2);
1223 while (C != '\n')
1224 putchar(c);
1225 putchar('\n');
1226 return 0;
1227 }
1228
1229 static int
1230 /*ARGSUSED*/
1231 AU(pacmac unused)
1232 {
1233
1234 if (mac == MM)
1235 return 0;
1236 SKIP_TO_COM;
1237 return COMX;
1238 }
1239
1240 static int
1241 SH(pacmac c12)
1242 {
1243 int c1, c2;
1244
1245 frommac(c12, c1, c2);
1246
1247 if (parag) {
1248 printf(".%c%c", c1, c2);
1249 while (C != '\n')
1250 putchar(c);
1251 putchar(c);
1252 putchar('!');
1253 for (;;) {
1254 while (C != '\n')
1255 putchar(c);
1256 putchar('\n');
1257 if (C == '.')
1258 return COM;
1259 putchar('!');
1260 putchar(c);
1261 }
1262 /*NOTREACHED*/
1263 } else {
1264 SKIP_TO_COM;
1265 return COMX;
1266 }
1267 }
1268
1269 static int
1270 /*ARGSUSED*/
1271 UX(pacmac unused)
1272 {
1273
1274 if (wordflag)
1275 printf("UNIX\n");
1276 else
1277 printf("UNIX ");
1278 return 0;
1279 }
1280
1281 static int
1282 MMHU(pacmac c12)
1283 {
1284 int c1, c2;
1285
1286 frommac(c12, c1, c2);
1287 if (parag) {
1288 printf(".%c%c", c1, c2);
1289 while (C != '\n')
1290 putchar(c);
1291 putchar('\n');
1292 } else {
1293 SKIP;
1294 }
1295 return 0;
1296 }
1297
1298 static int
1299 mesnblock(pacmac c12)
1300 {
1301 int c1, c2;
1302
1303 frommac(c12, c1, c2);
1304 noblock(')', c2);
1305 return 0;
1306 }
1307
1308 static int
1309 mssnblock(pacmac c12)
1310 {
1311 int c1, c2;
1312
1313 frommac(c12, c1, c2);
1314 noblock(c1, 'E');
1315 return 0;
1316 }
1317
1318 static int
1319 /*ARGUSED*/
1320 nf(pacmac unused)
1321 {
1322
1323 noblock('f', 'i');
1324 return 0;
1325 }
1326
1327 static int
1328 /*ARGUSED*/
1329 ce(pacmac unused)
1330 {
1331
1332 sce();
1333 return 0;
1334 }
1335
1336 static int
1337 meip(pacmac c12)
1338 {
1339
1340 if (parag)
1341 mepp(c12);
1342 else if (wordflag) /* save the tag */
1343 regline(meputmac, ONE);
1344 else
1345 SKIP;
1346 return 0;
1347 }
1348
1349 /*
1350 * only called for -me .pp or .sh, when parag is on
1351 */
1352 static int
1353 mepp(pacmac c12)
1354 {
1355
1356 PP(c12); /* eats the line */
1357 return 0;
1358 }
1359
1360 /*
1361 * Start of a section heading; output the section name if doing words
1362 */
1363 static int
1364 mesh(pacmac c12)
1365 {
1366
1367 if (parag)
1368 mepp(c12);
1369 else if (wordflag)
1370 defcomline(c12);
1371 else
1372 SKIP;
1373 return 0;
1374 }
1375
1376 /*
1377 * process a font setting
1378 */
1379 static int
1380 mefont(pacmac c12)
1381 {
1382
1383 argconcat = 1;
1384 defcomline(c12);
1385 argconcat = 0;
1386 return 0;
1387 }
1388
1389 static int
1390 manfont(pacmac c12)
1391 {
1392
1393 return mefont(c12);
1394 }
1395
1396 static int
1397 manpp(pacmac c12)
1398 {
1399
1400 return mepp(c12);
1401 }
1402
1403 static void
1404 defcomline(pacmac c12)
1405 {
1406 int c1, c2;
1407
1408 frommac(c12, c1, c2);
1409 if (msflag && mac == MM && c2 == 'L') {
1410 if (disp || c1 == 'R') {
1411 noblock('L', 'E');
1412 } else {
1413 SKIP;
1414 putchar('.');
1415 }
1416 }
1417 else if (c1 == '.' && c2 == '.') {
1418 if (msflag) {
1419 SKIP;
1420 return;
1421 }
1422 while (C == '.')
1423 /*VOID*/;
1424 }
1425 ++inmacro;
1426 /*
1427 * Process the arguments to the macro
1428 */
1429 switch (mac) {
1430 default:
1431 case MM:
1432 case MS:
1433 if (c1 <= 'Z' && msflag)
1434 regline(msputmac, ONE);
1435 else
1436 regline(msputmac, TWO);
1437 break;
1438 case ME:
1439 regline(meputmac, ONE);
1440 break;
1441 }
1442 --inmacro;
1443 }
1444
1445 static void
1446 comline(void)
1447 {
1448 int c1;
1449 int c2;
1450 pacmac c12;
1451 int mid;
1452 int lb, ub;
1453 int hit;
1454 static int tabsize = 0;
1455 static const struct mactab *mactab = NULL;
1456 const struct mactab *mp;
1457
1458 if (mactab == 0)
1459 buildtab(&mactab, &tabsize);
1460 com:
1461 while (C == ' ' || c == '\t')
1462 ;
1463 comx:
1464 if ((c1 = c) == '\n')
1465 return;
1466 c2 = C;
1467 if (c1 == '.' && c2 != '.')
1468 inmacro = NO;
1469 if (msflag && c1 == '[') {
1470 refer(c2);
1471 return;
1472 }
1473 if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1474 printf(".P\n");
1475 return;
1476 }
1477 if (c2 == '\n')
1478 return;
1479 /*
1480 * Single letter macro
1481 */
1482 if (mac == ME && (c2 == ' ' || c2 == '\t') )
1483 c2 = ' ';
1484 c12 = tomac(c1, c2);
1485 /*
1486 * binary search through the table of macros
1487 */
1488 lb = 0;
1489 ub = tabsize - 1;
1490 while (lb <= ub) {
1491 mid = (ub + lb) / 2;
1492 mp = &mactab[mid];
1493 if (mp->macname < c12)
1494 lb = mid + 1;
1495 else if (mp->macname > c12)
1496 ub = mid - 1;
1497 else {
1498 hit = 1;
1499 #ifdef FULLDEBUG
1500 printf("preliminary hit macro %c%c ", c1, c2);
1501 #endif /* FULLDEBUG */
1502 switch (mp->condition) {
1503 case NONE:
1504 hit = YES;
1505 break;
1506 case FNEST:
1507 hit = (filesp == files);
1508 break;
1509 case NOMAC:
1510 hit = !inmacro;
1511 break;
1512 case MAC:
1513 hit = inmacro;
1514 break;
1515 case PARAG:
1516 hit = parag;
1517 break;
1518 case NBLK:
1519 hit = !keepblock;
1520 break;
1521 default:
1522 hit = 0;
1523 }
1524
1525 if (hit) {
1526 #ifdef FULLDEBUG
1527 printf("MATCH\n");
1528 #endif /* FULLDEBUG */
1529 switch ((*(mp->func))(c12)) {
1530 default:
1531 return;
1532 case COMX:
1533 goto comx;
1534 case COM:
1535 goto com;
1536 }
1537 }
1538 #ifdef FULLDEBUG
1539 printf("FAIL\n");
1540 #endif /* FULLDEBUG */
1541 break;
1542 }
1543 }
1544 defcomline(c12);
1545 }
1546
1547 static int
1548 macsort(const void *p1, const void *p2)
1549 {
1550 const struct mactab *t1 = p1;
1551 const struct mactab *t2 = p2;
1552
1553 return t1->macname - t2->macname;
1554 }
1555
1556 static int
1557 sizetab(const struct mactab *mp)
1558 {
1559 int i;
1560
1561 i = 0;
1562 if (mp) {
1563 for (; mp->macname; mp++, i++)
1564 /*VOID*/ ;
1565 }
1566 return i;
1567 }
1568
1569 static struct mactab *
1570 macfill(struct mactab *dst, const struct mactab *src)
1571 {
1572
1573 if (src) {
1574 while (src->macname)
1575 *dst++ = *src++;
1576 }
1577 return dst;
1578 }
1579
1580 static void
1581 usage(void)
1582 {
1583 extern char *__progname;
1584
1585 fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname);
1586 exit(1);
1587 }
1588
1589 static void
1590 buildtab(const struct mactab **r_back, int *r_size)
1591 {
1592 size_t size;
1593 const struct mactab *p1, *p2;
1594 struct mactab *back, *p;
1595
1596 size = sizetab(troffmactab) + sizetab(ppmactab);
1597 p1 = p2 = NULL;
1598 if (msflag) {
1599 switch (mac) {
1600 case ME:
1601 p1 = memactab;
1602 break;
1603 case MM:
1604 p1 = msmactab;
1605 p2 = mmmactab;
1606 break;
1607 case MS:
1608 p1 = msmactab;
1609 break;
1610 case MA:
1611 p1 = manmactab;
1612 break;
1613 default:
1614 break;
1615 }
1616 }
1617 size += sizetab(p1);
1618 size += sizetab(p2);
1619 back = calloc(size + 2, sizeof(struct mactab));
1620 if (back == NULL)
1621 err(1, NULL);
1622
1623 p = macfill(back, troffmactab);
1624 p = macfill(p, ppmactab);
1625 p = macfill(p, p1);
1626 p = macfill(p, p2);
1627
1628 qsort(back, size, sizeof(struct mactab), macsort);
1629 *r_size = size;
1630 *r_back = back;
1631 }
1632
1633 /*
1634 * troff commands
1635 */
1636 static const struct mactab troffmactab[] = {
1637 M(NONE, '\\','"', skip), /* comment */
1638 M(NOMAC, 'd','e', domacro), /* define */
1639 M(NOMAC, 'i','g', domacro), /* ignore till .. */
1640 M(NOMAC, 'a','m', domacro), /* append macro */
1641 M(NBLK, 'n','f', nf), /* filled */
1642 M(NBLK, 'c','e', ce), /* centered */
1643
1644 M(NONE, 's','o', so), /* source a file */
1645 M(NONE, 'n','x', nx), /* go to next file */
1646
1647 M(NONE, 't','m', skip), /* print string on tty */
1648 M(NONE, 'h','w', skip), /* exception hyphen words */
1649 M(NONE, 0,0, 0)
1650 };
1651
1652 /*
1653 * Preprocessor output
1654 */
1655 static const struct mactab ppmactab[] = {
1656 M(FNEST, 'E','Q', EQ), /* equation starting */
1657 M(FNEST, 'T','S', intbl), /* table starting */
1658 M(FNEST, 'T','C', intbl), /* alternative table? */
1659 M(FNEST, 'T','&', intbl), /* table reformatting */
1660 M(NONE, 'T','E', outtbl),/* table ending */
1661 M(NONE, 'P','S', PS), /* picture starting */
1662 M(NONE, 0,0, 0)
1663 };
1664
1665 /*
1666 * Particular to ms and mm
1667 */
1668 static const struct mactab msmactab[] = {
1669 M(NONE, 'T','L', skiptocom), /* title follows */
1670 M(NONE, 'F','S', skiptocom), /* start footnote */
1671 M(NONE, 'O','K', skiptocom), /* Other kws */
1672
1673 M(NONE, 'N','R', skip), /* undocumented */
1674 M(NONE, 'N','D', skip), /* use supplied date */
1675
1676 M(PARAG, 'P','P', PP), /* begin parag */
1677 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */
1678 M(PARAG, 'L','P', PP), /* left blocked parag */
1679
1680 M(NONE, 'A','U', AU), /* author */
1681 M(NONE, 'A','I', AU), /* authors institution */
1682
1683 M(NONE, 'S','H', SH), /* section heading */
1684 M(NONE, 'S','N', SH), /* undocumented */
1685 M(NONE, 'U','X', UX), /* unix */
1686
1687 M(NBLK, 'D','S', mssnblock), /* start display text */
1688 M(NBLK, 'K','S', mssnblock), /* start keep */
1689 M(NBLK, 'K','F', mssnblock), /* start float keep */
1690 M(NONE, 0,0, 0)
1691 };
1692
1693 static const struct mactab mmmactab[] = {
1694 M(NONE, 'H',' ', MMHU), /* -mm ? */
1695 M(NONE, 'H','U', MMHU), /* -mm ? */
1696 M(PARAG, 'P',' ', PP), /* paragraph for -mm */
1697 M(NBLK, 'N','S', mssnblock), /* undocumented */
1698 M(NONE, 0,0, 0)
1699 };
1700
1701 static const struct mactab memactab[] = {
1702 M(PARAG, 'p','p', mepp),
1703 M(PARAG, 'l','p', mepp),
1704 M(PARAG, 'n','p', mepp),
1705 M(NONE, 'i','p', meip),
1706
1707 M(NONE, 's','h', mesh),
1708 M(NONE, 'u','h', mesh),
1709
1710 M(NBLK, '(','l', mesnblock),
1711 M(NBLK, '(','q', mesnblock),
1712 M(NBLK, '(','b', mesnblock),
1713 M(NBLK, '(','z', mesnblock),
1714 M(NBLK, '(','c', mesnblock),
1715
1716 M(NBLK, '(','d', mesnblock),
1717 M(NBLK, '(','f', mesnblock),
1718 M(NBLK, '(','x', mesnblock),
1719
1720 M(NONE, 'r',' ', mefont),
1721 M(NONE, 'i',' ', mefont),
1722 M(NONE, 'b',' ', mefont),
1723 M(NONE, 'u',' ', mefont),
1724 M(NONE, 'q',' ', mefont),
1725 M(NONE, 'r','b', mefont),
1726 M(NONE, 'b','i', mefont),
1727 M(NONE, 'b','x', mefont),
1728 M(NONE, 0,0, 0)
1729 };
1730
1731 static const struct mactab manmactab[] = {
1732 M(PARAG, 'B','I', manfont),
1733 M(PARAG, 'B','R', manfont),
1734 M(PARAG, 'I','B', manfont),
1735 M(PARAG, 'I','R', manfont),
1736 M(PARAG, 'R','B', manfont),
1737 M(PARAG, 'R','I', manfont),
1738
1739 M(PARAG, 'P','P', manpp),
1740 M(PARAG, 'L','P', manpp),
1741 M(PARAG, 'H','P', manpp),
1742 M(NONE, 0,0, 0)
1743 };
1744