deroff.c revision 1.4 1 /* $NetBSD: deroff.c,v 1.4 2007/12/15 16:32:06 perry Exp $ */
2
3 /* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */
4
5 /*-
6 * Copyright (c) 1988, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33 /*
34 * Copyright (C) Caldera International Inc. 2001-2002.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code and documentation must retain the above
41 * copyright notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed or owned by Caldera
48 * International, Inc.
49 * 4. Neither the name of Caldera International, Inc. nor the names of other
50 * contributors may be used to endorse or promote products derived from
51 * this software without specific prior written permission.
52 *
53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64 * POSSIBILITY OF SUCH DAMAGE.
65 */
66
67 #ifndef lint
68 static const char copyright[] =
69 "@(#) Copyright (c) 1988, 1993\n\
70 The Regents of the University of California. All rights reserved.\n";
71 #endif /* not lint */
72
73 #ifndef lint
74 #if 0
75 static const char sccsid[] = "@(#)deroff.c 8.1 (Berkeley) 6/6/93";
76 #else
77 static const char rcsid[] = "$NetBSD: deroff.c,v 1.4 2007/12/15 16:32:06 perry Exp $";
78 #endif
79 #endif /* not lint */
80
81 #include <sys/cdefs.h>
82 #include <err.h>
83 #include <limits.h>
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include <unistd.h>
88
89 /*
90 * Deroff command -- strip troff, eqn, and Tbl sequences from
91 * a file. Has two flags argument, -w, to cause output one word per line
92 * rather than in the original format.
93 * -mm (or -ms) causes the corresponding macro's to be interpreted
94 * so that just sentences are output
95 * -ml also gets rid of lists.
96 * Deroff follows .so and .nx commands, removes contents of macro
97 * definitions, equations (both .EQ ... .EN and $...$),
98 * Tbl command sequences, and Troff backslash constructions.
99 *
100 * All input is through the Cget macro;
101 * the most recently read character is in c.
102 *
103 * Modified by Robert Henry to process -me and -man macros.
104 */
105
106 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
107 #define C1get ( (c=getc(infile)) == EOF ? eof() : c)
108
109 #ifdef DEBUG
110 # define C _C()
111 # define C1 _C1()
112 #else /* not DEBUG */
113 # define C Cget
114 # define C1 C1get
115 #endif /* not DEBUG */
116
117 #define SKIP while (C != '\n')
118 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
119
120 #define YES 1
121 #define NO 0
122 #define MS 0 /* -ms */
123 #define MM 1 /* -mm */
124 #define ME 2 /* -me */
125 #define MA 3 /* -man */
126
127 #ifdef DEBUG
128 char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
129 #endif /* DEBUG */
130
131 #define ONE 1
132 #define TWO 2
133
134 #define NOCHAR -2
135 #define SPECIAL 0
136 #define APOS 1
137 #define PUNCT 2
138 #define DIGIT 3
139 #define LETTER 4
140
141 #define MAXFILES 20
142
143 static int iflag;
144 static int wordflag;
145 static int msflag; /* processing a source written using a mac package */
146 static int mac; /* which package */
147 static int disp;
148 static int parag;
149 static int inmacro;
150 static int intable;
151 static int keepblock; /* keep blocks of text; normally false when msflag */
152
153 static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
154
155 static char line[LINE_MAX];
156 static char *lp;
157
158 static int c;
159 static int pc;
160 static int ldelim;
161 static int rdelim;
162
163 static char fname[PATH_MAX];
164 static FILE *files[MAXFILES];
165 static FILE **filesp;
166 static FILE *infile;
167
168 static int argc;
169 static char **argv;
170
171 /*
172 * Macro processing
173 *
174 * Macro table definitions
175 */
176 typedef int pacmac; /* compressed macro name */
177 static int argconcat = 0; /* concat arguments together (-me only) */
178
179 #define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
180 #define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF))
181
182 struct mactab {
183 int condition;
184 pacmac macname;
185 int (*func)(pacmac);
186 };
187
188 static const struct mactab troffmactab[];
189 static const struct mactab ppmactab[];
190 static const struct mactab msmactab[];
191 static const struct mactab mmmactab[];
192 static const struct mactab memactab[];
193 static const struct mactab manmactab[];
194
195 /*
196 * Macro table initialization
197 */
198 #define M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
199
200 /*
201 * Flags for matching conditions other than
202 * the macro name
203 */
204 #define NONE 0
205 #define FNEST 1 /* no nested files */
206 #define NOMAC 2 /* no macro */
207 #define MAC 3 /* macro */
208 #define PARAG 4 /* in a paragraph */
209 #define MSF 5 /* msflag is on */
210 #define NBLK 6 /* set if no blocks to be kept */
211
212 /*
213 * Return codes from macro minions, determine where to jump,
214 * how to repeat/reprocess text
215 */
216 #define COMX 1 /* goto comx */
217 #define COM 2 /* goto com */
218
219 static int skeqn(void);
220 static int eof(void);
221 #ifdef DEBUG
222 static int _C1(void);
223 static int _C(void);
224 #endif
225 static int EQ(pacmac);
226 static int domacro(pacmac);
227 static int PS(pacmac);
228 static int skip(pacmac);
229 static int intbl(pacmac);
230 static int outtbl(pacmac);
231 static int so(pacmac);
232 static int nx(pacmac);
233 static int skiptocom(pacmac);
234 static int PP(pacmac);
235 static int AU(pacmac);
236 static int SH(pacmac);
237 static int UX(pacmac);
238 static int MMHU(pacmac);
239 static int mesnblock(pacmac);
240 static int mssnblock(pacmac);
241 static int nf(pacmac);
242 static int ce(pacmac);
243 static int meip(pacmac);
244 static int mepp(pacmac);
245 static int mesh(pacmac);
246 static int mefont(pacmac);
247 static int manfont(pacmac);
248 static int manpp(pacmac);
249 static int macsort(const void *, const void *);
250 static int sizetab(const struct mactab *);
251 static void getfname(void);
252 static void textline(char *, int);
253 static void work(void);
254 static void regline(void (*)(char *, int), int);
255 static void macro(void);
256 static void tbl(void);
257 static void stbl(void);
258 static void eqn(void);
259 static void backsl(void);
260 static void sce(void);
261 static void refer(int);
262 static void inpic(void);
263 static void msputmac(char *, int);
264 static void msputwords(int);
265 static void meputmac(char *, int);
266 static void meputwords(int);
267 static void noblock(char, char);
268 static void defcomline(pacmac);
269 static void comline(void);
270 static void buildtab(const struct mactab **, int *);
271 static FILE *opn(char *);
272 static struct mactab *macfill(struct mactab *, const struct mactab *);
273 static void usage(void) __attribute__((__noreturn__));
274
275 int
276 main(int ac, char **av)
277 {
278 int i, ch;
279 int errflg = 0;
280 int kflag = NO;
281
282 iflag = NO;
283 wordflag = NO;
284 msflag = NO;
285 mac = ME;
286 disp = NO;
287 parag = NO;
288 inmacro = NO;
289 intable = NO;
290 ldelim = NOCHAR;
291 rdelim = NOCHAR;
292 keepblock = YES;
293
294 while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
295 switch (ch) {
296 case 'i':
297 iflag = YES;
298 break;
299 case 'k':
300 kflag = YES;
301 break;
302 case 'm':
303 msflag = YES;
304 keepblock = NO;
305 switch (optarg[0]) {
306 case 'm':
307 mac = MM;
308 break;
309 case 's':
310 mac = MS;
311 break;
312 case 'e':
313 mac = ME;
314 break;
315 case 'a':
316 mac = MA;
317 break;
318 case 'l':
319 disp = YES;
320 break;
321 default:
322 errflg++;
323 break;
324 }
325 if (errflg == 0 && optarg[1] != '\0')
326 errflg++;
327 break;
328 case 'p':
329 parag = YES;
330 break;
331 case 'w':
332 wordflag = YES;
333 kflag = YES;
334 break;
335 default:
336 errflg++;
337 }
338 }
339 argc = ac - optind;
340 argv = av + optind;
341
342 if (kflag)
343 keepblock = YES;
344 if (errflg)
345 usage();
346
347 #ifdef DEBUG
348 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
349 msflag, mactab[mac], keepblock, disp);
350 #endif /* DEBUG */
351 if (argc == 0) {
352 infile = stdin;
353 } else {
354 infile = opn(argv[0]);
355 --argc;
356 ++argv;
357 }
358 files[0] = infile;
359 filesp = &files[0];
360
361 for (i = 'a'; i <= 'z' ; ++i)
362 chars[i] = LETTER;
363 for (i = 'A'; i <= 'Z'; ++i)
364 chars[i] = LETTER;
365 for (i = '0'; i <= '9'; ++i)
366 chars[i] = DIGIT;
367 chars['\''] = APOS;
368 chars['&'] = APOS;
369 chars['.'] = PUNCT;
370 chars[','] = PUNCT;
371 chars[';'] = PUNCT;
372 chars['?'] = PUNCT;
373 chars[':'] = PUNCT;
374 work();
375 return 0;
376 }
377
378 static int
379 skeqn(void)
380 {
381
382 while ((c = getc(infile)) != rdelim) {
383 if (c == EOF)
384 c = eof();
385 else if (c == '"') {
386 while ((c = getc(infile)) != '"') {
387 if (c == EOF ||
388 (c == '\\' && (c = getc(infile)) == EOF))
389 c = eof();
390 }
391 }
392 }
393 if (msflag)
394 return c == 'x';
395 return c == ' ';
396 }
397
398 static FILE *
399 opn(char *p)
400 {
401 FILE *fd;
402
403 if ((fd = fopen(p, "r")) == NULL)
404 err(1, "fopen %s", p);
405
406 return fd;
407 }
408
409 static int
410 eof(void)
411 {
412
413 if (infile != stdin)
414 fclose(infile);
415 if (filesp > files)
416 infile = *--filesp;
417 else if (argc > 0) {
418 infile = opn(argv[0]);
419 --argc;
420 ++argv;
421 } else
422 exit(0);
423 return C;
424 }
425
426 static void
427 getfname(void)
428 {
429 char *p;
430 struct chain {
431 struct chain *nextp;
432 char *datap;
433 } *q;
434 static struct chain *namechain= NULL;
435
436 while (C == ' ')
437 ; /* nothing */
438
439 for (p = fname ; p - fname < sizeof(fname) && (*p = c) != '\n' &&
440 c != ' ' && c != '\t' && c != '\\'; ++p)
441 C;
442 *p = '\0';
443 while (c != '\n')
444 C;
445
446 /* see if this name has already been used */
447 for (q = namechain ; q; q = q->nextp)
448 if (strcmp(fname, q->datap) == 0) {
449 fname[0] = '\0';
450 return;
451 }
452
453 q = (struct chain *) malloc(sizeof(struct chain));
454 if (q == NULL)
455 err(1, NULL);
456 q->nextp = namechain;
457 q->datap = strdup(fname);
458 if (q->datap == NULL)
459 err(1, NULL);
460 namechain = q;
461 }
462
463 /*ARGSUSED*/
464 static void
465 textline(char *str, int constant)
466 {
467
468 if (wordflag) {
469 msputwords(0);
470 return;
471 }
472 puts(str);
473 }
474
475 void
476 work(void)
477 {
478
479 for (;;) {
480 C;
481 #ifdef FULLDEBUG
482 printf("Starting work with `%c'\n", c);
483 #endif /* FULLDEBUG */
484 if (c == '.' || c == '\'')
485 comline();
486 else
487 regline(textline, TWO);
488 }
489 }
490
491 static void
492 regline(void (*pfunc)(char *, int), int constant)
493 {
494
495 line[0] = c;
496 lp = line;
497 while (lp - line < sizeof(line)) {
498 if (c == '\\') {
499 *lp = ' ';
500 backsl();
501 }
502 if (c == '\n')
503 break;
504 if (intable && c == 'T') {
505 *++lp = C;
506 if (c == '{' || c == '}') {
507 lp[-1] = ' ';
508 *lp = C;
509 }
510 } else {
511 *++lp = C;
512 }
513 }
514 *lp = '\0';
515
516 if (line[0] != '\0')
517 (*pfunc)(line, constant);
518 }
519
520 static void
521 macro(void)
522 {
523
524 if (msflag) {
525 do {
526 SKIP;
527 } while (C!='.' || C!='.' || C=='.'); /* look for .. */
528 if (c != '\n')
529 SKIP;
530 return;
531 }
532 SKIP;
533 inmacro = YES;
534 }
535
536 static void
537 tbl(void)
538 {
539
540 while (C != '.')
541 ; /* nothing */
542 SKIP;
543 intable = YES;
544 }
545
546 static void
547 stbl(void)
548 {
549
550 while (C != '.')
551 ; /* nothing */
552 SKIP_TO_COM;
553 if (c != 'T' || C != 'E') {
554 SKIP;
555 pc = c;
556 while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
557 pc = c;
558 }
559 }
560
561 static void
562 eqn(void)
563 {
564 int c1, c2;
565 int dflg;
566 char last;
567
568 last=0;
569 dflg = 1;
570 SKIP;
571
572 for (;;) {
573 if (C1 == '.' || c == '\'') {
574 while (C1 == ' ' || c == '\t')
575 ;
576 if (c == 'E' && C1 == 'N') {
577 SKIP;
578 if (msflag && dflg) {
579 putchar('x');
580 putchar(' ');
581 if (last) {
582 putchar(last);
583 putchar('\n');
584 }
585 }
586 return;
587 }
588 } else if (c == 'd') {
589 /* look for delim */
590 if (C1 == 'e' && C1 == 'l')
591 if (C1 == 'i' && C1 == 'm') {
592 while (C1 == ' ')
593 ; /* nothing */
594
595 if ((c1 = c) == '\n' ||
596 (c2 = C1) == '\n' ||
597 (c1 == 'o' && c2 == 'f' && C1=='f')) {
598 ldelim = NOCHAR;
599 rdelim = NOCHAR;
600 } else {
601 ldelim = c1;
602 rdelim = c2;
603 }
604 }
605 dflg = 0;
606 }
607
608 if (c != '\n')
609 while (C1 != '\n') {
610 if (chars[c] == PUNCT)
611 last = c;
612 else if (c != ' ')
613 last = 0;
614 }
615 }
616 }
617
618 /* skip over a complete backslash construction */
619 static void
620 backsl(void)
621 {
622 int bdelim;
623
624 sw:
625 switch (C) {
626 case '"':
627 SKIP;
628 return;
629
630 case 's':
631 if (C == '\\')
632 backsl();
633 else {
634 while (C >= '0' && c <= '9')
635 ; /* nothing */
636 ungetc(c, infile);
637 c = '0';
638 }
639 --lp;
640 return;
641
642 case 'f':
643 case 'n':
644 case '*':
645 if (C != '(')
646 return;
647
648 case '(':
649 if (msflag) {
650 if (C == 'e') {
651 if (C == 'm') {
652 *lp = '-';
653 return;
654 }
655 }
656 else if (c != '\n')
657 C;
658 return;
659 }
660 if (C != '\n')
661 C;
662 return;
663
664 case '$':
665 C; /* discard argument number */
666 return;
667
668 case 'b':
669 case 'x':
670 case 'v':
671 case 'h':
672 case 'w':
673 case 'o':
674 case 'l':
675 case 'L':
676 if ((bdelim = C) == '\n')
677 return;
678 while (C != '\n' && c != bdelim)
679 if (c == '\\')
680 backsl();
681 return;
682
683 case '\\':
684 if (inmacro)
685 goto sw;
686
687 default:
688 return;
689 }
690 }
691
692 static void
693 sce(void)
694 {
695 char *ap;
696 int n, i;
697 char a[10];
698
699 for (ap = a; C != '\n'; ap++) {
700 *ap = c;
701 if (ap == &a[9]) {
702 SKIP;
703 ap = a;
704 break;
705 }
706 }
707 if (ap != a)
708 n = atoi(a);
709 else
710 n = 1;
711 for (i = 0; i < n;) {
712 if (C == '.') {
713 if (C == 'c') {
714 if (C == 'e') {
715 while (C == ' ')
716 ; /* nothing */
717 if (c == '0') {
718 SKIP;
719 break;
720 } else
721 SKIP;
722 }
723 else
724 SKIP;
725 } else if (c == 'P' || C == 'P') {
726 if (c != '\n')
727 SKIP;
728 break;
729 } else if (c != '\n')
730 SKIP;
731 } else {
732 SKIP;
733 i++;
734 }
735 }
736 }
737
738 static void
739 refer(int c1)
740 {
741 int c2;
742
743 if (c1 != '\n')
744 SKIP;
745
746 for (c2 = -1;;) {
747 if (C != '.')
748 SKIP;
749 else {
750 if (C != ']')
751 SKIP;
752 else {
753 while (C != '\n')
754 c2 = c;
755 if (c2 != -1 && chars[c2] == PUNCT)
756 putchar(c2);
757 return;
758 }
759 }
760 }
761 }
762
763 static void
764 inpic(void)
765 {
766 int c1;
767 char *p1;
768
769 SKIP;
770 p1 = line;
771 c = '\n';
772 for (;;) {
773 c1 = c;
774 if (C == '.' && c1 == '\n') {
775 if (C != 'P') {
776 if (c == '\n')
777 continue;
778 else {
779 SKIP;
780 c = '\n';
781 continue;
782 }
783 }
784 if (C != 'E') {
785 if (c == '\n')
786 continue;
787 else {
788 SKIP;
789 c = '\n';
790 continue;
791 }
792 }
793 SKIP;
794 return;
795 }
796 else if (c == '\"') {
797 while (C != '\"') {
798 if (c == '\\') {
799 if (C == '\"')
800 continue;
801 ungetc(c, infile);
802 backsl();
803 } else
804 *p1++ = c;
805 }
806 *p1++ = ' ';
807 }
808 else if (c == '\n' && p1 != line) {
809 *p1 = '\0';
810 if (wordflag)
811 msputwords(NO);
812 else {
813 puts(line);
814 putchar('\n');
815 }
816 p1 = line;
817 }
818 }
819 }
820
821 #ifdef DEBUG
822 static int
823 _C1(void)
824 {
825
826 return C1get);
827 }
828
829 static int
830 _C(void)
831 {
832
833 return Cget);
834 }
835 #endif /* DEBUG */
836
837 /*
838 * Put out a macro line, using ms and mm conventions.
839 */
840 static void
841 msputmac(char *s, int constant)
842 {
843 char *t;
844 int found;
845 int last;
846
847 last = 0;
848 found = 0;
849 if (wordflag) {
850 msputwords(YES);
851 return;
852 }
853 while (*s) {
854 while (*s == ' ' || *s == '\t')
855 putchar(*s++);
856 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
857 ; /* nothing */
858 if (*s == '\"')
859 s++;
860 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
861 chars[(unsigned char)s[1]] == LETTER) {
862 while (s < t)
863 if (*s == '\"')
864 s++;
865 else
866 putchar(*s++);
867 last = *(t-1);
868 found++;
869 } else if (found && chars[(unsigned char)s[0]] == PUNCT &&
870 s[1] == '\0') {
871 putchar(*s++);
872 } else {
873 last = *(t - 1);
874 s = t;
875 }
876 }
877 putchar('\n');
878 if (msflag && chars[last] == PUNCT) {
879 putchar(last);
880 putchar('\n');
881 }
882 }
883
884 /*
885 * put out words (for the -w option) with ms and mm conventions
886 */
887 static void
888 msputwords(int macline)
889 {
890 char *p, *p1;
891 int i, nlet;
892
893 for (p1 = line;;) {
894 /*
895 * skip initial specials ampersands and apostrophes
896 */
897 while (chars[(unsigned char)*p1] < DIGIT)
898 if (*p1++ == '\0')
899 return;
900 nlet = 0;
901 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
902 if (i == LETTER)
903 ++nlet;
904
905 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
906 /*
907 * delete trailing ampersands and apostrophes
908 */
909 while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
910 i == APOS )
911 --p;
912 while (p1 < p)
913 putchar(*p1++);
914 putchar('\n');
915 } else {
916 p1 = p;
917 }
918 }
919 }
920
921 /*
922 * put out a macro using the me conventions
923 */
924 #define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; }
925 #define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
926
927 static void
928 meputmac(char *cp, int constant)
929 {
930 char *np;
931 int found;
932 int argno;
933 int last;
934 int inquote;
935
936 last = 0;
937 found = 0;
938 if (wordflag) {
939 meputwords(YES);
940 return;
941 }
942 for (argno = 0; *cp; argno++) {
943 SKIPBLANK(cp);
944 inquote = (*cp == '"');
945 if (inquote)
946 cp++;
947 for (np = cp; *np; np++) {
948 switch (*np) {
949 case '\n':
950 case '\0':
951 break;
952
953 case '\t':
954 case ' ':
955 if (inquote)
956 continue;
957 else
958 goto endarg;
959
960 case '"':
961 if (inquote && np[1] == '"') {
962 memmove(np, np + 1, strlen(np));
963 np++;
964 continue;
965 } else {
966 *np = ' '; /* bye bye " */
967 goto endarg;
968 }
969
970 default:
971 continue;
972 }
973 }
974 endarg: ;
975 /*
976 * cp points at the first char in the arg
977 * np points one beyond the last char in the arg
978 */
979 if ((argconcat == 0) || (argconcat != argno))
980 putchar(' ');
981 #ifdef FULLDEBUG
982 {
983 char *p;
984 printf("[%d,%d: ", argno, np - cp);
985 for (p = cp; p < np; p++) {
986 putchar(*p);
987 }
988 printf("]");
989 }
990 #endif /* FULLDEBUG */
991 /*
992 * Determine if the argument merits being printed
993 *
994 * constant is the cut off point below which something
995 * is not a word.
996 */
997 if (((np - cp) > constant) &&
998 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
999 for (cp = cp; cp < np; cp++)
1000 putchar(*cp);
1001 last = np[-1];
1002 found++;
1003 } else if (found && (np - cp == 1) &&
1004 chars[(unsigned char)*cp] == PUNCT) {
1005 putchar(*cp);
1006 } else {
1007 last = np[-1];
1008 }
1009 cp = np;
1010 }
1011 if (msflag && chars[last] == PUNCT)
1012 putchar(last);
1013 putchar('\n');
1014 }
1015
1016 /*
1017 * put out words (for the -w option) with ms and mm conventions
1018 */
1019 static void
1020 meputwords(int macline)
1021 {
1022
1023 msputwords(macline);
1024 }
1025
1026 /*
1027 *
1028 * Skip over a nested set of macros
1029 *
1030 * Possible arguments to noblock are:
1031 *
1032 * fi end of unfilled text
1033 * PE pic ending
1034 * DE display ending
1035 *
1036 * for ms and mm only:
1037 * KE keep ending
1038 *
1039 * NE undocumented match to NS (for mm?)
1040 * LE mm only: matches RL or *L (for lists)
1041 *
1042 * for me:
1043 * ([lqbzcdf]
1044 */
1045 static void
1046 noblock(char a1, char a2)
1047 {
1048 int c1,c2;
1049 int eqnf;
1050 int lct;
1051
1052 lct = 0;
1053 eqnf = 1;
1054 SKIP;
1055 for (;;) {
1056 while (C != '.')
1057 if (c == '\n')
1058 continue;
1059 else
1060 SKIP;
1061 if ((c1 = C) == '\n')
1062 continue;
1063 if ((c2 = C) == '\n')
1064 continue;
1065 if (c1 == a1 && c2 == a2) {
1066 SKIP;
1067 if (lct != 0) {
1068 lct--;
1069 continue;
1070 }
1071 if (eqnf)
1072 putchar('.');
1073 putchar('\n');
1074 return;
1075 } else if (a1 == 'L' && c2 == 'L') {
1076 lct++;
1077 SKIP;
1078 }
1079 /*
1080 * equations (EQ) nested within a display
1081 */
1082 else if (c1 == 'E' && c2 == 'Q') {
1083 if ((mac == ME && a1 == ')')
1084 || (mac != ME && a1 == 'D')) {
1085 eqn();
1086 eqnf=0;
1087 }
1088 }
1089 /*
1090 * turning on filling is done by the paragraphing
1091 * macros
1092 */
1093 else if (a1 == 'f') { /* .fi */
1094 if ((mac == ME && (c2 == 'h' || c2 == 'p'))
1095 || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1096 SKIP;
1097 return;
1098 }
1099 } else {
1100 SKIP;
1101 }
1102 }
1103 }
1104
1105 static int
1106 /*ARGSUSED*/
1107 EQ(pacmac unused)
1108 {
1109
1110 eqn();
1111 return 0;
1112 }
1113
1114 static int
1115 /*ARGSUSED*/
1116 domacro(pacmac unused)
1117 {
1118
1119 macro();
1120 return 0;
1121 }
1122
1123 static int
1124 /*ARGSUSED*/
1125 PS(pacmac unused)
1126 {
1127
1128 for (C; c == ' ' || c == '\t'; C)
1129 ; /* nothing */
1130
1131 if (c == '<') { /* ".PS < file" -- don't expect a .PE */
1132 SKIP;
1133 return 0;
1134 }
1135 if (!msflag)
1136 inpic();
1137 else
1138 noblock('P', 'E');
1139 return 0;
1140 }
1141
1142 static int
1143 /*ARGSUSED*/
1144 skip(pacmac unused)
1145 {
1146
1147 SKIP;
1148 return 0;
1149 }
1150
1151 static int
1152 /*ARGSUSED*/
1153 intbl(pacmac unused)
1154 {
1155
1156 if (msflag)
1157 stbl();
1158 else
1159 tbl();
1160 return 0;
1161 }
1162
1163 static int
1164 /*ARGSUSED*/
1165 outtbl(pacmac unused)
1166 {
1167
1168 intable = NO;
1169 return 0;
1170 }
1171
1172 int
1173 /*ARGSUSED*/
1174 so(pacmac unused)
1175 {
1176
1177 if (!iflag) {
1178 getfname();
1179 if (fname[0]) {
1180 if (++filesp - &files[0] > MAXFILES)
1181 err(1, "too many nested files (max %d)",
1182 MAXFILES);
1183 infile = *filesp = opn(fname);
1184 }
1185 }
1186 return 0;
1187 }
1188
1189 static int
1190 /*ARGSUSED*/
1191 nx(pacmac unused)
1192 {
1193
1194 if (!iflag) {
1195 getfname();
1196 if (fname[0] == '\0')
1197 exit(0);
1198 if (infile != stdin)
1199 fclose(infile);
1200 infile = *filesp = opn(fname);
1201 }
1202 return 0;
1203 }
1204
1205 static int
1206 /*ARGSUSED*/
1207 skiptocom(pacmac unused)
1208 {
1209
1210 SKIP_TO_COM;
1211 return COMX;
1212 }
1213
1214 static int
1215 PP(pacmac c12)
1216 {
1217 int c1, c2;
1218
1219 frommac(c12, c1, c2);
1220 printf(".%c%c", c1, c2);
1221 while (C != '\n')
1222 putchar(c);
1223 putchar('\n');
1224 return 0;
1225 }
1226
1227 static int
1228 /*ARGSUSED*/
1229 AU(pacmac unused)
1230 {
1231
1232 if (mac == MM)
1233 return 0;
1234 SKIP_TO_COM;
1235 return COMX;
1236 }
1237
1238 static int
1239 SH(pacmac c12)
1240 {
1241 int c1, c2;
1242
1243 frommac(c12, c1, c2);
1244
1245 if (parag) {
1246 printf(".%c%c", c1, c2);
1247 while (C != '\n')
1248 putchar(c);
1249 putchar(c);
1250 putchar('!');
1251 for (;;) {
1252 while (C != '\n')
1253 putchar(c);
1254 putchar('\n');
1255 if (C == '.')
1256 return COM;
1257 putchar('!');
1258 putchar(c);
1259 }
1260 /*NOTREACHED*/
1261 } else {
1262 SKIP_TO_COM;
1263 return COMX;
1264 }
1265 }
1266
1267 static int
1268 /*ARGSUSED*/
1269 UX(pacmac unused)
1270 {
1271
1272 if (wordflag)
1273 printf("UNIX\n");
1274 else
1275 printf("UNIX ");
1276 return 0;
1277 }
1278
1279 static int
1280 MMHU(pacmac c12)
1281 {
1282 int c1, c2;
1283
1284 frommac(c12, c1, c2);
1285 if (parag) {
1286 printf(".%c%c", c1, c2);
1287 while (C != '\n')
1288 putchar(c);
1289 putchar('\n');
1290 } else {
1291 SKIP;
1292 }
1293 return 0;
1294 }
1295
1296 static int
1297 mesnblock(pacmac c12)
1298 {
1299 int c1, c2;
1300
1301 frommac(c12, c1, c2);
1302 noblock(')', c2);
1303 return 0;
1304 }
1305
1306 static int
1307 mssnblock(pacmac c12)
1308 {
1309 int c1, c2;
1310
1311 frommac(c12, c1, c2);
1312 noblock(c1, 'E');
1313 return 0;
1314 }
1315
1316 static int
1317 /*ARGUSED*/
1318 nf(pacmac unused)
1319 {
1320
1321 noblock('f', 'i');
1322 return 0;
1323 }
1324
1325 static int
1326 /*ARGUSED*/
1327 ce(pacmac unused)
1328 {
1329
1330 sce();
1331 return 0;
1332 }
1333
1334 static int
1335 meip(pacmac c12)
1336 {
1337
1338 if (parag)
1339 mepp(c12);
1340 else if (wordflag) /* save the tag */
1341 regline(meputmac, ONE);
1342 else
1343 SKIP;
1344 return 0;
1345 }
1346
1347 /*
1348 * only called for -me .pp or .sh, when parag is on
1349 */
1350 static int
1351 mepp(pacmac c12)
1352 {
1353
1354 PP(c12); /* eats the line */
1355 return 0;
1356 }
1357
1358 /*
1359 * Start of a section heading; output the section name if doing words
1360 */
1361 static int
1362 mesh(pacmac c12)
1363 {
1364
1365 if (parag)
1366 mepp(c12);
1367 else if (wordflag)
1368 defcomline(c12);
1369 else
1370 SKIP;
1371 return 0;
1372 }
1373
1374 /*
1375 * process a font setting
1376 */
1377 static int
1378 mefont(pacmac c12)
1379 {
1380
1381 argconcat = 1;
1382 defcomline(c12);
1383 argconcat = 0;
1384 return 0;
1385 }
1386
1387 static int
1388 manfont(pacmac c12)
1389 {
1390
1391 return mefont(c12);
1392 }
1393
1394 static int
1395 manpp(pacmac c12)
1396 {
1397
1398 return mepp(c12);
1399 }
1400
1401 static void
1402 defcomline(pacmac c12)
1403 {
1404 int c1, c2;
1405
1406 frommac(c12, c1, c2);
1407 if (msflag && mac == MM && c2 == 'L') {
1408 if (disp || c1 == 'R') {
1409 noblock('L', 'E');
1410 } else {
1411 SKIP;
1412 putchar('.');
1413 }
1414 }
1415 else if (c1 == '.' && c2 == '.') {
1416 if (msflag) {
1417 SKIP;
1418 return;
1419 }
1420 while (C == '.')
1421 /*VOID*/;
1422 }
1423 ++inmacro;
1424 /*
1425 * Process the arguments to the macro
1426 */
1427 switch (mac) {
1428 default:
1429 case MM:
1430 case MS:
1431 if (c1 <= 'Z' && msflag)
1432 regline(msputmac, ONE);
1433 else
1434 regline(msputmac, TWO);
1435 break;
1436 case ME:
1437 regline(meputmac, ONE);
1438 break;
1439 }
1440 --inmacro;
1441 }
1442
1443 static void
1444 comline(void)
1445 {
1446 int c1;
1447 int c2;
1448 pacmac c12;
1449 int mid;
1450 int lb, ub;
1451 int hit;
1452 static int tabsize = 0;
1453 static const struct mactab *mactab = NULL;
1454 const struct mactab *mp;
1455
1456 if (mactab == 0)
1457 buildtab(&mactab, &tabsize);
1458 com:
1459 while (C == ' ' || c == '\t')
1460 ;
1461 comx:
1462 if ((c1 = c) == '\n')
1463 return;
1464 c2 = C;
1465 if (c1 == '.' && c2 != '.')
1466 inmacro = NO;
1467 if (msflag && c1 == '[') {
1468 refer(c2);
1469 return;
1470 }
1471 if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1472 printf(".P\n");
1473 return;
1474 }
1475 if (c2 == '\n')
1476 return;
1477 /*
1478 * Single letter macro
1479 */
1480 if (mac == ME && (c2 == ' ' || c2 == '\t') )
1481 c2 = ' ';
1482 c12 = tomac(c1, c2);
1483 /*
1484 * binary search through the table of macros
1485 */
1486 lb = 0;
1487 ub = tabsize - 1;
1488 while (lb <= ub) {
1489 mid = (ub + lb) / 2;
1490 mp = &mactab[mid];
1491 if (mp->macname < c12)
1492 lb = mid + 1;
1493 else if (mp->macname > c12)
1494 ub = mid - 1;
1495 else {
1496 hit = 1;
1497 #ifdef FULLDEBUG
1498 printf("preliminary hit macro %c%c ", c1, c2);
1499 #endif /* FULLDEBUG */
1500 switch (mp->condition) {
1501 case NONE:
1502 hit = YES;
1503 break;
1504 case FNEST:
1505 hit = (filesp == files);
1506 break;
1507 case NOMAC:
1508 hit = !inmacro;
1509 break;
1510 case MAC:
1511 hit = inmacro;
1512 break;
1513 case PARAG:
1514 hit = parag;
1515 break;
1516 case NBLK:
1517 hit = !keepblock;
1518 break;
1519 default:
1520 hit = 0;
1521 }
1522
1523 if (hit) {
1524 #ifdef FULLDEBUG
1525 printf("MATCH\n");
1526 #endif /* FULLDEBUG */
1527 switch ((*(mp->func))(c12)) {
1528 default:
1529 return;
1530 case COMX:
1531 goto comx;
1532 case COM:
1533 goto com;
1534 }
1535 }
1536 #ifdef FULLDEBUG
1537 printf("FAIL\n");
1538 #endif /* FULLDEBUG */
1539 break;
1540 }
1541 }
1542 defcomline(c12);
1543 }
1544
1545 static int
1546 macsort(const void *p1, const void *p2)
1547 {
1548 const struct mactab *t1 = p1;
1549 const struct mactab *t2 = p2;
1550
1551 return t1->macname - t2->macname;
1552 }
1553
1554 static int
1555 sizetab(const struct mactab *mp)
1556 {
1557 int i;
1558
1559 i = 0;
1560 if (mp) {
1561 for (; mp->macname; mp++, i++)
1562 /*VOID*/ ;
1563 }
1564 return i;
1565 }
1566
1567 static struct mactab *
1568 macfill(struct mactab *dst, const struct mactab *src)
1569 {
1570
1571 if (src) {
1572 while (src->macname)
1573 *dst++ = *src++;
1574 }
1575 return dst;
1576 }
1577
1578 static void
1579 usage(void)
1580 {
1581 extern char *__progname;
1582
1583 fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname);
1584 exit(1);
1585 }
1586
1587 static void
1588 buildtab(const struct mactab **r_back, int *r_size)
1589 {
1590 size_t size;
1591 const struct mactab *p1, *p2;
1592 struct mactab *back, *p;
1593
1594 size = sizetab(troffmactab) + sizetab(ppmactab);
1595 p1 = p2 = NULL;
1596 if (msflag) {
1597 switch (mac) {
1598 case ME:
1599 p1 = memactab;
1600 break;
1601 case MM:
1602 p1 = msmactab;
1603 p2 = mmmactab;
1604 break;
1605 case MS:
1606 p1 = msmactab;
1607 break;
1608 case MA:
1609 p1 = manmactab;
1610 break;
1611 default:
1612 break;
1613 }
1614 }
1615 size += sizetab(p1);
1616 size += sizetab(p2);
1617 back = calloc(size + 2, sizeof(struct mactab));
1618 if (back == NULL)
1619 err(1, NULL);
1620
1621 p = macfill(back, troffmactab);
1622 p = macfill(p, ppmactab);
1623 p = macfill(p, p1);
1624 p = macfill(p, p2);
1625
1626 qsort(back, size, sizeof(struct mactab), macsort);
1627 *r_size = size;
1628 *r_back = back;
1629 }
1630
1631 /*
1632 * troff commands
1633 */
1634 static const struct mactab troffmactab[] = {
1635 M(NONE, '\\','"', skip), /* comment */
1636 M(NOMAC, 'd','e', domacro), /* define */
1637 M(NOMAC, 'i','g', domacro), /* ignore till .. */
1638 M(NOMAC, 'a','m', domacro), /* append macro */
1639 M(NBLK, 'n','f', nf), /* filled */
1640 M(NBLK, 'c','e', ce), /* centered */
1641
1642 M(NONE, 's','o', so), /* source a file */
1643 M(NONE, 'n','x', nx), /* go to next file */
1644
1645 M(NONE, 't','m', skip), /* print string on tty */
1646 M(NONE, 'h','w', skip), /* exception hyphen words */
1647 M(NONE, 0,0, 0)
1648 };
1649
1650 /*
1651 * Preprocessor output
1652 */
1653 static const struct mactab ppmactab[] = {
1654 M(FNEST, 'E','Q', EQ), /* equation starting */
1655 M(FNEST, 'T','S', intbl), /* table starting */
1656 M(FNEST, 'T','C', intbl), /* alternative table? */
1657 M(FNEST, 'T','&', intbl), /* table reformatting */
1658 M(NONE, 'T','E', outtbl),/* table ending */
1659 M(NONE, 'P','S', PS), /* picture starting */
1660 M(NONE, 0,0, 0)
1661 };
1662
1663 /*
1664 * Particular to ms and mm
1665 */
1666 static const struct mactab msmactab[] = {
1667 M(NONE, 'T','L', skiptocom), /* title follows */
1668 M(NONE, 'F','S', skiptocom), /* start footnote */
1669 M(NONE, 'O','K', skiptocom), /* Other kws */
1670
1671 M(NONE, 'N','R', skip), /* undocumented */
1672 M(NONE, 'N','D', skip), /* use supplied date */
1673
1674 M(PARAG, 'P','P', PP), /* begin parag */
1675 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */
1676 M(PARAG, 'L','P', PP), /* left blocked parag */
1677
1678 M(NONE, 'A','U', AU), /* author */
1679 M(NONE, 'A','I', AU), /* authors institution */
1680
1681 M(NONE, 'S','H', SH), /* section heading */
1682 M(NONE, 'S','N', SH), /* undocumented */
1683 M(NONE, 'U','X', UX), /* unix */
1684
1685 M(NBLK, 'D','S', mssnblock), /* start display text */
1686 M(NBLK, 'K','S', mssnblock), /* start keep */
1687 M(NBLK, 'K','F', mssnblock), /* start float keep */
1688 M(NONE, 0,0, 0)
1689 };
1690
1691 static const struct mactab mmmactab[] = {
1692 M(NONE, 'H',' ', MMHU), /* -mm ? */
1693 M(NONE, 'H','U', MMHU), /* -mm ? */
1694 M(PARAG, 'P',' ', PP), /* paragraph for -mm */
1695 M(NBLK, 'N','S', mssnblock), /* undocumented */
1696 M(NONE, 0,0, 0)
1697 };
1698
1699 static const struct mactab memactab[] = {
1700 M(PARAG, 'p','p', mepp),
1701 M(PARAG, 'l','p', mepp),
1702 M(PARAG, 'n','p', mepp),
1703 M(NONE, 'i','p', meip),
1704
1705 M(NONE, 's','h', mesh),
1706 M(NONE, 'u','h', mesh),
1707
1708 M(NBLK, '(','l', mesnblock),
1709 M(NBLK, '(','q', mesnblock),
1710 M(NBLK, '(','b', mesnblock),
1711 M(NBLK, '(','z', mesnblock),
1712 M(NBLK, '(','c', mesnblock),
1713
1714 M(NBLK, '(','d', mesnblock),
1715 M(NBLK, '(','f', mesnblock),
1716 M(NBLK, '(','x', mesnblock),
1717
1718 M(NONE, 'r',' ', mefont),
1719 M(NONE, 'i',' ', mefont),
1720 M(NONE, 'b',' ', mefont),
1721 M(NONE, 'u',' ', mefont),
1722 M(NONE, 'q',' ', mefont),
1723 M(NONE, 'r','b', mefont),
1724 M(NONE, 'b','i', mefont),
1725 M(NONE, 'b','x', mefont),
1726 M(NONE, 0,0, 0)
1727 };
1728
1729 static const struct mactab manmactab[] = {
1730 M(PARAG, 'B','I', manfont),
1731 M(PARAG, 'B','R', manfont),
1732 M(PARAG, 'I','B', manfont),
1733 M(PARAG, 'I','R', manfont),
1734 M(PARAG, 'R','B', manfont),
1735 M(PARAG, 'R','I', manfont),
1736
1737 M(PARAG, 'P','P', manpp),
1738 M(PARAG, 'L','P', manpp),
1739 M(PARAG, 'H','P', manpp),
1740 M(NONE, 0,0, 0)
1741 };
1742