1 /* $NetBSD: deroff.c,v 1.14 2025/02/20 19:32:16 rillig Exp $ */ 2 3 /* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */ 4 5 /*- 6 * Copyright (c) 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 /* 34 * Copyright (C) Caldera International Inc. 2001-2002. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code and documentation must retain the above 41 * copyright notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed or owned by Caldera 48 * International, Inc. 49 * 4. Neither the name of Caldera International, Inc. nor the names of other 50 * contributors may be used to endorse or promote products derived from 51 * this software without specific prior written permission. 52 * 53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 64 * POSSIBILITY OF SUCH DAMAGE. 65 */ 66 67 #include <sys/cdefs.h> 68 __RCSID("$NetBSD: deroff.c,v 1.14 2025/02/20 19:32:16 rillig Exp $"); 69 70 #include <err.h> 71 #include <limits.h> 72 #include <stddef.h> 73 #include <stdio.h> 74 #include <stdlib.h> 75 #include <string.h> 76 #include <unistd.h> 77 78 /* 79 * Deroff command -- strip troff, eqn, and Tbl sequences from 80 * a file. Has two flags argument, -w, to cause output one word per line 81 * rather than in the original format. 82 * -mm (or -ms) causes the corresponding macro's to be interpreted 83 * so that just sentences are output 84 * -ml also gets rid of lists. 85 * Deroff follows .so and .nx commands, removes contents of macro 86 * definitions, equations (both .EQ ... .EN and $...$), 87 * Tbl command sequences, and Troff backslash constructions. 88 * 89 * All input is through the Cget macro; 90 * the most recently read character is in c. 91 * 92 * Modified by Robert Henry to process -me and -man macros. 93 */ 94 95 #define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) ) 96 #define C1get ( (c=getc(infile)) == EOF ? eof() : c) 97 98 #ifdef DEBUG 99 # define C _C() 100 # define C1 _C1() 101 #else /* not DEBUG */ 102 # define C Cget 103 # define C1 C1get 104 #endif /* not DEBUG */ 105 106 #define SKIP while (C != '\n') 107 #define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c 108 109 #define YES 1 110 #define NO 0 111 #define MS 0 /* -ms */ 112 #define MM 1 /* -mm */ 113 #define ME 2 /* -me */ 114 #define MA 3 /* -man */ 115 116 #ifdef DEBUG 117 static char *mactab[] = { "-ms", "-mm", "-me", "-ma" }; 118 #endif /* DEBUG */ 119 120 #define ONE 1 121 #define TWO 2 122 123 #define NOCHAR -2 124 #define SPECIAL 0 125 #define APOS 1 126 #define PUNCT 2 127 #define DIGIT 3 128 #define LETTER 4 129 130 #define MAXFILES 20 131 132 static int iflag; 133 static int wordflag; 134 static int msflag; /* processing a source written using a mac package */ 135 static int mac; /* which package */ 136 static int disp; 137 static int parag; 138 static int inmacro; 139 static int intable; 140 static int keepblock; /* keep blocks of text; normally false when msflag */ 141 142 static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */ 143 144 static char line[LINE_MAX]; 145 static char *lp; 146 147 static int c; 148 static int pc; 149 static int ldelim; 150 static int rdelim; 151 152 static char fname[PATH_MAX]; 153 static FILE *files[MAXFILES]; 154 static FILE **filesp; 155 static FILE *infile; 156 157 static int argc; 158 static char **argv; 159 160 /* 161 * Macro processing 162 * 163 * Macro table definitions 164 */ 165 typedef int pacmac; /* compressed macro name */ 166 static int argconcat = 0; /* concat arguments together (-me only) */ 167 168 #define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF)) 169 #define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF), __USE(c1), __USE(c2)) 170 171 struct mactab { 172 int condition; 173 pacmac macname; 174 int (*func)(pacmac); 175 }; 176 177 static const struct mactab troffmactab[]; 178 static const struct mactab ppmactab[]; 179 static const struct mactab msmactab[]; 180 static const struct mactab mmmactab[]; 181 static const struct mactab memactab[]; 182 static const struct mactab manmactab[]; 183 184 /* 185 * Macro table initialization 186 */ 187 #define M(cond, c1, c2, func) {cond, tomac(c1, c2), func} 188 189 /* 190 * Flags for matching conditions other than 191 * the macro name 192 */ 193 #define NONE 0 194 #define FNEST 1 /* no nested files */ 195 #define NOMAC 2 /* no macro */ 196 #define MAC 3 /* macro */ 197 #define PARAG 4 /* in a paragraph */ 198 #define MSF 5 /* msflag is on */ 199 #define NBLK 6 /* set if no blocks to be kept */ 200 201 /* 202 * Return codes from macro minions, determine where to jump, 203 * how to repeat/reprocess text 204 */ 205 #define COMX 1 /* goto comx */ 206 #define COM 2 /* goto com */ 207 208 static int skeqn(void); 209 static int eof(void); 210 #ifdef DEBUG 211 static int _C1(void); 212 static int _C(void); 213 #endif 214 static int EQ(pacmac); 215 static int domacro(pacmac); 216 static int PS(pacmac); 217 static int skip(pacmac); 218 static int intbl(pacmac); 219 static int outtbl(pacmac); 220 static int so(pacmac); 221 static int nx(pacmac); 222 static int skiptocom(pacmac); 223 static int PP(pacmac); 224 static int AU(pacmac); 225 static int SH(pacmac); 226 static int UX(pacmac); 227 static int MMHU(pacmac); 228 static int mesnblock(pacmac); 229 static int mssnblock(pacmac); 230 static int nf(pacmac); 231 static int ce(pacmac); 232 static int meip(pacmac); 233 static int mepp(pacmac); 234 static int mesh(pacmac); 235 static int mefont(pacmac); 236 static int manfont(pacmac); 237 static int manpp(pacmac); 238 static int macsort(const void *, const void *); 239 static int sizetab(const struct mactab *); 240 static void getfname(void); 241 static void textline(char *, int); 242 static void work(void) __dead; 243 static void regline(void (*)(char *, int), int); 244 static void macro(void); 245 static void tbl(void); 246 static void stbl(void); 247 static void eqn(void); 248 static void backsl(void); 249 static void sce(void); 250 static void refer(int); 251 static void inpic(void); 252 static void msputmac(char *, int); 253 static void msputwords(void); 254 static void meputmac(char *, int); 255 static void meputwords(void); 256 static void noblock(char, char); 257 static void defcomline(pacmac); 258 static void comline(void); 259 static void buildtab(const struct mactab **, int *); 260 static FILE *opn(char *); 261 static struct mactab *macfill(struct mactab *, const struct mactab *); 262 static void usage(void) __dead; 263 264 int 265 main(int ac, char **av) 266 { 267 int i, ch; 268 int errflg = 0; 269 int kflag = NO; 270 271 iflag = NO; 272 wordflag = NO; 273 msflag = NO; 274 mac = ME; 275 disp = NO; 276 parag = NO; 277 inmacro = NO; 278 intable = NO; 279 ldelim = NOCHAR; 280 rdelim = NOCHAR; 281 keepblock = YES; 282 283 while ((ch = getopt(ac, av, "ikpwm:")) != -1) { 284 switch (ch) { 285 case 'i': 286 iflag = YES; 287 break; 288 case 'k': 289 kflag = YES; 290 break; 291 case 'm': 292 msflag = YES; 293 keepblock = NO; 294 switch (optarg[0]) { 295 case 'm': 296 mac = MM; 297 break; 298 case 's': 299 mac = MS; 300 break; 301 case 'e': 302 mac = ME; 303 break; 304 case 'a': 305 mac = MA; 306 break; 307 case 'l': 308 disp = YES; 309 break; 310 default: 311 errflg++; 312 break; 313 } 314 if (errflg == 0 && optarg[1] != '\0') 315 errflg++; 316 break; 317 case 'p': 318 parag = YES; 319 break; 320 case 'w': 321 wordflag = YES; 322 kflag = YES; 323 break; 324 default: 325 errflg++; 326 } 327 } 328 argc = ac - optind; 329 argv = av + optind; 330 331 if (kflag) 332 keepblock = YES; 333 if (errflg) 334 usage(); 335 336 #ifdef DEBUG 337 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n", 338 msflag, mactab[mac], keepblock, disp); 339 #endif /* DEBUG */ 340 if (argc == 0) { 341 infile = stdin; 342 } else { 343 infile = opn(argv[0]); 344 --argc; 345 ++argv; 346 } 347 files[0] = infile; 348 filesp = &files[0]; 349 350 for (i = 'a'; i <= 'z' ; ++i) 351 chars[i] = LETTER; 352 for (i = 'A'; i <= 'Z'; ++i) 353 chars[i] = LETTER; 354 for (i = '0'; i <= '9'; ++i) 355 chars[i] = DIGIT; 356 chars['\''] = APOS; 357 chars['&'] = APOS; 358 chars['.'] = PUNCT; 359 chars[','] = PUNCT; 360 chars[';'] = PUNCT; 361 chars['?'] = PUNCT; 362 chars[':'] = PUNCT; 363 work(); 364 } 365 366 static int 367 skeqn(void) 368 { 369 370 while ((c = getc(infile)) != rdelim) { 371 if (c == EOF) 372 c = eof(); 373 else if (c == '"') { 374 while ((c = getc(infile)) != '"') { 375 if (c == EOF || 376 (c == '\\' && (c = getc(infile)) == EOF)) 377 c = eof(); 378 } 379 } 380 } 381 if (msflag) 382 return c == 'x'; 383 return c == ' '; 384 } 385 386 static FILE * 387 opn(char *p) 388 { 389 FILE *fd; 390 391 if ((fd = fopen(p, "r")) == NULL) 392 err(1, "fopen %s", p); 393 394 return fd; 395 } 396 397 static int 398 eof(void) 399 { 400 401 if (infile != stdin) 402 fclose(infile); 403 if (filesp > files) 404 infile = *--filesp; 405 else if (argc > 0) { 406 infile = opn(argv[0]); 407 --argc; 408 ++argv; 409 } else 410 exit(0); 411 return C; 412 } 413 414 static void 415 getfname(void) 416 { 417 char *p; 418 struct chain { 419 struct chain *nextp; 420 char *datap; 421 } *q; 422 static struct chain *namechain= NULL; 423 424 while (C == ' ') 425 ; /* nothing */ 426 427 for (p = fname ; p - fname < (ptrdiff_t)sizeof(fname) && 428 (*p = c) != '\n' && 429 c != ' ' && c != '\t' && c != '\\'; ++p) 430 C; 431 *p = '\0'; 432 while (c != '\n') 433 C; 434 435 /* see if this name has already been used */ 436 for (q = namechain ; q; q = q->nextp) 437 if (strcmp(fname, q->datap) == 0) { 438 fname[0] = '\0'; 439 return; 440 } 441 442 q = (struct chain *) malloc(sizeof(struct chain)); 443 if (q == NULL) 444 err(1, NULL); 445 q->nextp = namechain; 446 q->datap = strdup(fname); 447 if (q->datap == NULL) 448 err(1, NULL); 449 namechain = q; 450 } 451 452 /*ARGSUSED*/ 453 static void 454 textline(char *str, int constant) 455 { 456 457 if (wordflag) { 458 msputwords(); 459 return; 460 } 461 puts(str); 462 } 463 464 static void 465 work(void) 466 { 467 468 for (;;) { 469 C; 470 #ifdef FULLDEBUG 471 printf("Starting work with `%c'\n", c); 472 #endif /* FULLDEBUG */ 473 if (c == '.' || c == '\'') 474 comline(); 475 else 476 regline(textline, TWO); 477 } 478 } 479 480 static void 481 regline(void (*pfunc)(char *, int), int constant) 482 { 483 484 line[0] = c; 485 lp = line; 486 while (lp - line < (ptrdiff_t)sizeof(line)) { 487 if (c == '\\') { 488 *lp = ' '; 489 backsl(); 490 } 491 if (c == '\n') 492 break; 493 if (intable && c == 'T') { 494 *++lp = C; 495 if (c == '{' || c == '}') { 496 lp[-1] = ' '; 497 *lp = C; 498 } 499 } else { 500 *++lp = C; 501 } 502 } 503 *lp = '\0'; 504 505 if (line[0] != '\0') 506 (*pfunc)(line, constant); 507 } 508 509 static void 510 macro(void) 511 { 512 513 if (msflag) { 514 do { 515 SKIP; 516 } while (C!='.' || C!='.' || C=='.'); /* look for .. */ 517 if (c != '\n') 518 SKIP; 519 return; 520 } 521 SKIP; 522 inmacro = YES; 523 } 524 525 static void 526 tbl(void) 527 { 528 529 while (C != '.') 530 ; /* nothing */ 531 SKIP; 532 intable = YES; 533 } 534 535 static void 536 stbl(void) 537 { 538 539 while (C != '.') 540 ; /* nothing */ 541 SKIP_TO_COM; 542 if (c != 'T' || C != 'E') { 543 SKIP; 544 pc = c; 545 while (C != '.' || pc != '\n' || C != 'T' || C != 'E') 546 pc = c; 547 } 548 } 549 550 static void 551 eqn(void) 552 { 553 int c1, c2; 554 int dflg; 555 char last; 556 557 last=0; 558 dflg = 1; 559 SKIP; 560 561 for (;;) { 562 if (C1 == '.' || c == '\'') { 563 while (C1 == ' ' || c == '\t') 564 ; 565 if (c == 'E' && C1 == 'N') { 566 SKIP; 567 if (msflag && dflg) { 568 putchar('x'); 569 putchar(' '); 570 if (last) { 571 putchar(last); 572 putchar('\n'); 573 } 574 } 575 return; 576 } 577 } else if (c == 'd') { 578 /* look for delim */ 579 if (C1 == 'e' && C1 == 'l') 580 if (C1 == 'i' && C1 == 'm') { 581 while (C1 == ' ') 582 ; /* nothing */ 583 584 if ((c1 = c) == '\n' || 585 (c2 = C1) == '\n' || 586 (c1 == 'o' && c2 == 'f' && C1=='f')) { 587 ldelim = NOCHAR; 588 rdelim = NOCHAR; 589 } else { 590 ldelim = c1; 591 rdelim = c2; 592 } 593 } 594 dflg = 0; 595 } 596 597 if (c != '\n') 598 while (C1 != '\n') { 599 if (chars[c] == PUNCT) 600 last = c; 601 else if (c != ' ') 602 last = 0; 603 } 604 } 605 } 606 607 /* skip over a complete backslash construction */ 608 static void 609 backsl(void) 610 { 611 int bdelim; 612 613 sw: 614 switch (C) { 615 case '"': 616 SKIP; 617 return; 618 619 case 's': 620 if (C == '\\') 621 backsl(); 622 else { 623 while (C >= '0' && c <= '9') 624 ; /* nothing */ 625 ungetc(c, infile); 626 c = '0'; 627 } 628 --lp; 629 return; 630 631 case 'f': 632 case 'n': 633 case '*': 634 if (C != '(') 635 return; 636 637 /* FALLTHROUGH */ 638 case '(': 639 if (msflag) { 640 if (C == 'e') { 641 if (C == 'm') { 642 *lp = '-'; 643 return; 644 } 645 } 646 else if (c != '\n') 647 C; 648 return; 649 } 650 if (C != '\n') 651 C; 652 return; 653 654 case '$': 655 C; /* discard argument number */ 656 return; 657 658 case 'b': 659 case 'x': 660 case 'v': 661 case 'h': 662 case 'w': 663 case 'o': 664 case 'l': 665 case 'L': 666 if ((bdelim = C) == '\n') 667 return; 668 while (C != '\n' && c != bdelim) 669 if (c == '\\') 670 backsl(); 671 return; 672 673 case '\\': 674 if (inmacro) 675 goto sw; 676 return; 677 678 default: 679 return; 680 } 681 } 682 683 static void 684 sce(void) 685 { 686 char *ap; 687 int n, i; 688 char a[10]; 689 690 for (ap = a; C != '\n'; ap++) { 691 *ap = c; 692 if (ap == &a[9]) { 693 SKIP; 694 ap = a; 695 break; 696 } 697 } 698 if (ap != a) 699 n = atoi(a); 700 else 701 n = 1; 702 for (i = 0; i < n;) { 703 if (C == '.') { 704 if (C == 'c') { 705 if (C == 'e') { 706 while (C == ' ') 707 ; /* nothing */ 708 if (c == '0') { 709 SKIP; 710 break; 711 } else 712 SKIP; 713 } 714 else 715 SKIP; 716 } else if (c == 'P' || C == 'P') { 717 if (c != '\n') 718 SKIP; 719 break; 720 } else if (c != '\n') 721 SKIP; 722 } else { 723 SKIP; 724 i++; 725 } 726 } 727 } 728 729 static void 730 refer(int c1) 731 { 732 int c2; 733 734 if (c1 != '\n') 735 SKIP; 736 737 for (c2 = -1;;) { 738 if (C != '.') 739 SKIP; 740 else { 741 if (C != ']') 742 SKIP; 743 else { 744 while (C != '\n') 745 c2 = c; 746 if (c2 != -1 && chars[c2] == PUNCT) 747 putchar(c2); 748 return; 749 } 750 } 751 } 752 } 753 754 static void 755 inpic(void) 756 { 757 int c1; 758 char *p1; 759 760 SKIP; 761 p1 = line; 762 c = '\n'; 763 for (;;) { 764 c1 = c; 765 if (C == '.' && c1 == '\n') { 766 if (C != 'P') { 767 if (c == '\n') 768 continue; 769 else { 770 SKIP; 771 c = '\n'; 772 continue; 773 } 774 } 775 if (C != 'E') { 776 if (c == '\n') 777 continue; 778 else { 779 SKIP; 780 c = '\n'; 781 continue; 782 } 783 } 784 SKIP; 785 return; 786 } 787 else if (c == '\"') { 788 while (C != '\"') { 789 if (c == '\\') { 790 if (C == '\"') 791 continue; 792 ungetc(c, infile); 793 backsl(); 794 } else 795 *p1++ = c; 796 } 797 *p1++ = ' '; 798 } 799 else if (c == '\n' && p1 != line) { 800 *p1 = '\0'; 801 if (wordflag) 802 msputwords(); 803 else { 804 puts(line); 805 putchar('\n'); 806 } 807 p1 = line; 808 } 809 } 810 } 811 812 #ifdef DEBUG 813 static int 814 _C1(void) 815 { 816 817 return C1get; 818 } 819 820 static int 821 _C(void) 822 { 823 824 return Cget; 825 } 826 #endif /* DEBUG */ 827 828 /* 829 * Put out a macro line, using ms and mm conventions. 830 */ 831 static void 832 msputmac(char *s, int constant) 833 { 834 char *t; 835 int found; 836 int last; 837 838 last = 0; 839 found = 0; 840 if (wordflag) { 841 msputwords(); 842 return; 843 } 844 while (*s) { 845 while (*s == ' ' || *s == '\t') 846 putchar(*s++); 847 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t) 848 ; /* nothing */ 849 if (*s == '\"') 850 s++; 851 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER && 852 chars[(unsigned char)s[1]] == LETTER) { 853 while (s < t) 854 if (*s == '\"') 855 s++; 856 else 857 putchar(*s++); 858 last = *(t-1); 859 found++; 860 } else if (found && chars[(unsigned char)s[0]] == PUNCT && 861 s[1] == '\0') { 862 putchar(*s++); 863 } else { 864 last = *(t - 1); 865 s = t; 866 } 867 } 868 putchar('\n'); 869 if (msflag && chars[last] == PUNCT) { 870 putchar(last); 871 putchar('\n'); 872 } 873 } 874 875 /* 876 * put out words (for the -w option) with ms and mm conventions 877 */ 878 static void 879 msputwords(void) 880 { 881 char *p, *p1; 882 int i, nlet; 883 884 for (p1 = line;;) { 885 /* 886 * skip initial specials ampersands and apostrophes 887 */ 888 while (chars[(unsigned char)*p1] < DIGIT) 889 if (*p1++ == '\0') 890 return; 891 nlet = 0; 892 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p) 893 if (i == LETTER) 894 ++nlet; 895 896 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) { 897 /* 898 * delete trailing ampersands and apostrophes 899 */ 900 while ((i = chars[(unsigned char)p[-1]]) == PUNCT || 901 i == APOS ) 902 --p; 903 while (p1 < p) 904 putchar(*p1++); 905 putchar('\n'); 906 } else { 907 p1 = p; 908 } 909 } 910 } 911 912 /* 913 * put out a macro using the me conventions 914 */ 915 #define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; } 916 917 static void 918 meputmac(char *cp, int constant) 919 { 920 char *np; 921 int found; 922 int argno; 923 int last; 924 int inquote; 925 926 last = 0; 927 found = 0; 928 if (wordflag) { 929 meputwords(); 930 return; 931 } 932 for (argno = 0; *cp; argno++) { 933 SKIPBLANK(cp); 934 inquote = (*cp == '"'); 935 if (inquote) 936 cp++; 937 for (np = cp; *np; np++) { 938 switch (*np) { 939 case '\n': 940 case '\0': 941 break; 942 943 case '\t': 944 case ' ': 945 if (inquote) 946 continue; 947 else 948 goto endarg; 949 950 case '"': 951 if (inquote && np[1] == '"') { 952 memmove(np, np + 1, strlen(np)); 953 np++; 954 continue; 955 } else { 956 *np = ' '; /* bye bye " */ 957 goto endarg; 958 } 959 960 default: 961 continue; 962 } 963 } 964 endarg: ; 965 /* 966 * cp points at the first char in the arg 967 * np points one beyond the last char in the arg 968 */ 969 if ((argconcat == 0) || (argconcat != argno)) 970 putchar(' '); 971 #ifdef FULLDEBUG 972 { 973 char *p; 974 printf("[%d,%d: ", argno, np - cp); 975 for (p = cp; p < np; p++) { 976 putchar(*p); 977 } 978 printf("]"); 979 } 980 #endif /* FULLDEBUG */ 981 /* 982 * Determine if the argument merits being printed 983 * 984 * constant is the cut off point below which something 985 * is not a word. 986 */ 987 if (((np - cp) > constant) && 988 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) { 989 for (; cp < np; cp++) 990 putchar(*cp); 991 last = np[-1]; 992 found++; 993 } else if (found && (np - cp == 1) && 994 chars[(unsigned char)*cp] == PUNCT) { 995 putchar(*cp); 996 } else { 997 last = np[-1]; 998 } 999 cp = np; 1000 } 1001 if (msflag && chars[last] == PUNCT) 1002 putchar(last); 1003 putchar('\n'); 1004 } 1005 1006 /* 1007 * put out words (for the -w option) with ms and mm conventions 1008 */ 1009 static void 1010 meputwords(void) 1011 { 1012 1013 msputwords(); 1014 } 1015 1016 /* 1017 * 1018 * Skip over a nested set of macros 1019 * 1020 * Possible arguments to noblock are: 1021 * 1022 * fi end of unfilled text 1023 * PE pic ending 1024 * DE display ending 1025 * 1026 * for ms and mm only: 1027 * KE keep ending 1028 * 1029 * NE undocumented match to NS (for mm?) 1030 * LE mm only: matches RL or *L (for lists) 1031 * 1032 * for me: 1033 * ([lqbzcdf] 1034 */ 1035 static void 1036 noblock(char a1, char a2) 1037 { 1038 int c1,c2; 1039 int eqnf; 1040 int lct; 1041 1042 lct = 0; 1043 eqnf = 1; 1044 SKIP; 1045 for (;;) { 1046 while (C != '.') 1047 if (c == '\n') 1048 continue; 1049 else 1050 SKIP; 1051 if ((c1 = C) == '\n') 1052 continue; 1053 if ((c2 = C) == '\n') 1054 continue; 1055 if (c1 == a1 && c2 == a2) { 1056 SKIP; 1057 if (lct != 0) { 1058 lct--; 1059 continue; 1060 } 1061 if (eqnf) 1062 putchar('.'); 1063 putchar('\n'); 1064 return; 1065 } else if (a1 == 'L' && c2 == 'L') { 1066 lct++; 1067 SKIP; 1068 } 1069 /* 1070 * equations (EQ) nested within a display 1071 */ 1072 else if (c1 == 'E' && c2 == 'Q') { 1073 if ((mac == ME && a1 == ')') 1074 || (mac != ME && a1 == 'D')) { 1075 eqn(); 1076 eqnf=0; 1077 } 1078 } 1079 /* 1080 * turning on filling is done by the paragraphing 1081 * macros 1082 */ 1083 else if (a1 == 'f') { /* .fi */ 1084 if ((mac == ME && (c2 == 'h' || c2 == 'p')) 1085 || (mac != ME && (c1 == 'P' || c2 == 'P'))) { 1086 SKIP; 1087 return; 1088 } 1089 } else { 1090 SKIP; 1091 } 1092 } 1093 } 1094 1095 static int 1096 /*ARGSUSED*/ 1097 EQ(pacmac unused) 1098 { 1099 1100 eqn(); 1101 return 0; 1102 } 1103 1104 static int 1105 /*ARGSUSED*/ 1106 domacro(pacmac unused) 1107 { 1108 1109 macro(); 1110 return 0; 1111 } 1112 1113 static int 1114 /*ARGSUSED*/ 1115 PS(pacmac unused) 1116 { 1117 1118 for (C; c == ' ' || c == '\t'; C) 1119 ; /* nothing */ 1120 1121 if (c == '<') { /* ".PS < file" -- don't expect a .PE */ 1122 SKIP; 1123 return 0; 1124 } 1125 if (!msflag) 1126 inpic(); 1127 else 1128 noblock('P', 'E'); 1129 return 0; 1130 } 1131 1132 static int 1133 /*ARGSUSED*/ 1134 skip(pacmac unused) 1135 { 1136 1137 SKIP; 1138 return 0; 1139 } 1140 1141 static int 1142 /*ARGSUSED*/ 1143 intbl(pacmac unused) 1144 { 1145 1146 if (msflag) 1147 stbl(); 1148 else 1149 tbl(); 1150 return 0; 1151 } 1152 1153 static int 1154 /*ARGSUSED*/ 1155 outtbl(pacmac unused) 1156 { 1157 1158 intable = NO; 1159 return 0; 1160 } 1161 1162 static int 1163 /*ARGSUSED*/ 1164 so(pacmac unused) 1165 { 1166 1167 if (!iflag) { 1168 getfname(); 1169 if (fname[0]) { 1170 if (++filesp - &files[0] > MAXFILES) 1171 err(1, "too many nested files (max %d)", 1172 MAXFILES); 1173 infile = *filesp = opn(fname); 1174 } 1175 } 1176 return 0; 1177 } 1178 1179 static int 1180 /*ARGSUSED*/ 1181 nx(pacmac unused) 1182 { 1183 1184 if (!iflag) { 1185 getfname(); 1186 if (fname[0] == '\0') 1187 exit(0); 1188 if (infile != stdin) 1189 fclose(infile); 1190 infile = *filesp = opn(fname); 1191 } 1192 return 0; 1193 } 1194 1195 static int 1196 /*ARGSUSED*/ 1197 skiptocom(pacmac unused) 1198 { 1199 1200 SKIP_TO_COM; 1201 return COMX; 1202 } 1203 1204 static int 1205 PP(pacmac c12) 1206 { 1207 int c1, c2; 1208 1209 frommac(c12, c1, c2); 1210 printf(".%c%c", c1, c2); 1211 while (C != '\n') 1212 putchar(c); 1213 putchar('\n'); 1214 return 0; 1215 } 1216 1217 static int 1218 /*ARGSUSED*/ 1219 AU(pacmac unused) 1220 { 1221 1222 if (mac == MM) 1223 return 0; 1224 SKIP_TO_COM; 1225 return COMX; 1226 } 1227 1228 static int 1229 SH(pacmac c12) 1230 { 1231 int c1, c2; 1232 1233 frommac(c12, c1, c2); 1234 1235 if (parag) { 1236 printf(".%c%c", c1, c2); 1237 while (C != '\n') 1238 putchar(c); 1239 putchar(c); 1240 putchar('!'); 1241 for (;;) { 1242 while (C != '\n') 1243 putchar(c); 1244 putchar('\n'); 1245 if (C == '.') 1246 return COM; 1247 putchar('!'); 1248 putchar(c); 1249 } 1250 /*NOTREACHED*/ 1251 } else { 1252 SKIP_TO_COM; 1253 return COMX; 1254 } 1255 } 1256 1257 static int 1258 /*ARGSUSED*/ 1259 UX(pacmac unused) 1260 { 1261 1262 if (wordflag) 1263 printf("UNIX\n"); 1264 else 1265 printf("UNIX "); 1266 return 0; 1267 } 1268 1269 static int 1270 MMHU(pacmac c12) 1271 { 1272 int c1, c2; 1273 1274 frommac(c12, c1, c2); 1275 if (parag) { 1276 printf(".%c%c", c1, c2); 1277 while (C != '\n') 1278 putchar(c); 1279 putchar('\n'); 1280 } else { 1281 SKIP; 1282 } 1283 return 0; 1284 } 1285 1286 static int 1287 mesnblock(pacmac c12) 1288 { 1289 int c1, c2; 1290 1291 frommac(c12, c1, c2); 1292 noblock(')', c2); 1293 return 0; 1294 } 1295 1296 static int 1297 mssnblock(pacmac c12) 1298 { 1299 int c1, c2; 1300 1301 frommac(c12, c1, c2); 1302 noblock(c1, 'E'); 1303 return 0; 1304 } 1305 1306 static int 1307 /*ARGSUSED*/ 1308 nf(pacmac unused) 1309 { 1310 1311 noblock('f', 'i'); 1312 return 0; 1313 } 1314 1315 static int 1316 /*ARGSUSED*/ 1317 ce(pacmac unused) 1318 { 1319 1320 sce(); 1321 return 0; 1322 } 1323 1324 static int 1325 meip(pacmac c12) 1326 { 1327 1328 if (parag) 1329 mepp(c12); 1330 else if (wordflag) /* save the tag */ 1331 regline(meputmac, ONE); 1332 else 1333 SKIP; 1334 return 0; 1335 } 1336 1337 /* 1338 * only called for -me .pp or .sh, when parag is on 1339 */ 1340 static int 1341 mepp(pacmac c12) 1342 { 1343 1344 PP(c12); /* eats the line */ 1345 return 0; 1346 } 1347 1348 /* 1349 * Start of a section heading; output the section name if doing words 1350 */ 1351 static int 1352 mesh(pacmac c12) 1353 { 1354 1355 if (parag) 1356 mepp(c12); 1357 else if (wordflag) 1358 defcomline(c12); 1359 else 1360 SKIP; 1361 return 0; 1362 } 1363 1364 /* 1365 * process a font setting 1366 */ 1367 static int 1368 mefont(pacmac c12) 1369 { 1370 1371 argconcat = 1; 1372 defcomline(c12); 1373 argconcat = 0; 1374 return 0; 1375 } 1376 1377 static int 1378 manfont(pacmac c12) 1379 { 1380 1381 return mefont(c12); 1382 } 1383 1384 static int 1385 manpp(pacmac c12) 1386 { 1387 1388 return mepp(c12); 1389 } 1390 1391 static void 1392 defcomline(pacmac c12) 1393 { 1394 int c1, c2; 1395 1396 frommac(c12, c1, c2); 1397 if (msflag && mac == MM && c2 == 'L') { 1398 if (disp || c1 == 'R') { 1399 noblock('L', 'E'); 1400 } else { 1401 SKIP; 1402 putchar('.'); 1403 } 1404 } 1405 else if (c1 == '.' && c2 == '.') { 1406 if (msflag) { 1407 SKIP; 1408 return; 1409 } 1410 while (C == '.') 1411 /*VOID*/; 1412 } 1413 ++inmacro; 1414 /* 1415 * Process the arguments to the macro 1416 */ 1417 switch (mac) { 1418 default: 1419 case MM: 1420 case MS: 1421 if (c1 <= 'Z' && msflag) 1422 regline(msputmac, ONE); 1423 else 1424 regline(msputmac, TWO); 1425 break; 1426 case ME: 1427 regline(meputmac, ONE); 1428 break; 1429 } 1430 --inmacro; 1431 } 1432 1433 static void 1434 comline(void) 1435 { 1436 int c1; 1437 int c2; 1438 pacmac c12; 1439 int mid; 1440 int lb, ub; 1441 int hit; 1442 static int tabsize = 0; 1443 static const struct mactab *mactab = NULL; 1444 const struct mactab *mp; 1445 1446 if (mactab == 0) 1447 buildtab(&mactab, &tabsize); 1448 com: 1449 while (C == ' ' || c == '\t') 1450 ; 1451 comx: 1452 if ((c1 = c) == '\n') 1453 return; 1454 c2 = C; 1455 if (c1 == '.' && c2 != '.') 1456 inmacro = NO; 1457 if (msflag && c1 == '[') { 1458 refer(c2); 1459 return; 1460 } 1461 if (parag && mac==MM && c1 == 'P' && c2 == '\n') { 1462 printf(".P\n"); 1463 return; 1464 } 1465 if (c2 == '\n') 1466 return; 1467 /* 1468 * Single letter macro 1469 */ 1470 if (mac == ME && (c2 == ' ' || c2 == '\t') ) 1471 c2 = ' '; 1472 c12 = tomac(c1, c2); 1473 /* 1474 * binary search through the table of macros 1475 */ 1476 lb = 0; 1477 ub = tabsize - 1; 1478 while (lb <= ub) { 1479 mid = (ub + lb) / 2; 1480 mp = &mactab[mid]; 1481 if (mp->macname < c12) 1482 lb = mid + 1; 1483 else if (mp->macname > c12) 1484 ub = mid - 1; 1485 else { 1486 hit = 1; 1487 #ifdef FULLDEBUG 1488 printf("preliminary hit macro %c%c ", c1, c2); 1489 #endif /* FULLDEBUG */ 1490 switch (mp->condition) { 1491 case NONE: 1492 hit = YES; 1493 break; 1494 case FNEST: 1495 hit = (filesp == files); 1496 break; 1497 case NOMAC: 1498 hit = !inmacro; 1499 break; 1500 case MAC: 1501 hit = inmacro; 1502 break; 1503 case PARAG: 1504 hit = parag; 1505 break; 1506 case NBLK: 1507 hit = !keepblock; 1508 break; 1509 default: 1510 hit = 0; 1511 } 1512 1513 if (hit) { 1514 #ifdef FULLDEBUG 1515 printf("MATCH\n"); 1516 #endif /* FULLDEBUG */ 1517 switch ((*(mp->func))(c12)) { 1518 default: 1519 return; 1520 case COMX: 1521 goto comx; 1522 case COM: 1523 goto com; 1524 } 1525 } 1526 #ifdef FULLDEBUG 1527 printf("FAIL\n"); 1528 #endif /* FULLDEBUG */ 1529 break; 1530 } 1531 } 1532 defcomline(c12); 1533 } 1534 1535 static int 1536 macsort(const void *p1, const void *p2) 1537 { 1538 const struct mactab *t1 = p1; 1539 const struct mactab *t2 = p2; 1540 1541 return t1->macname - t2->macname; 1542 } 1543 1544 static int 1545 sizetab(const struct mactab *mp) 1546 { 1547 int i; 1548 1549 i = 0; 1550 if (mp) { 1551 for (; mp->macname; mp++, i++) 1552 /*VOID*/ ; 1553 } 1554 return i; 1555 } 1556 1557 static struct mactab * 1558 macfill(struct mactab *dst, const struct mactab *src) 1559 { 1560 1561 if (src) { 1562 while (src->macname) 1563 *dst++ = *src++; 1564 } 1565 return dst; 1566 } 1567 1568 static void 1569 usage(void) 1570 { 1571 extern char *__progname; 1572 1573 fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname); 1574 exit(1); 1575 } 1576 1577 static void 1578 buildtab(const struct mactab **r_back, int *r_size) 1579 { 1580 size_t size; 1581 const struct mactab *p1, *p2; 1582 struct mactab *back, *p; 1583 1584 size = sizetab(troffmactab) + sizetab(ppmactab); 1585 p1 = p2 = NULL; 1586 if (msflag) { 1587 switch (mac) { 1588 case ME: 1589 p1 = memactab; 1590 break; 1591 case MM: 1592 p1 = msmactab; 1593 p2 = mmmactab; 1594 break; 1595 case MS: 1596 p1 = msmactab; 1597 break; 1598 case MA: 1599 p1 = manmactab; 1600 break; 1601 default: 1602 break; 1603 } 1604 } 1605 size += sizetab(p1); 1606 size += sizetab(p2); 1607 back = calloc(size + 2, sizeof(struct mactab)); 1608 if (back == NULL) 1609 err(1, NULL); 1610 1611 p = macfill(back, troffmactab); 1612 p = macfill(p, ppmactab); 1613 p = macfill(p, p1); 1614 p = macfill(p, p2); 1615 1616 qsort(back, size, sizeof(struct mactab), macsort); 1617 *r_size = size; 1618 *r_back = back; 1619 } 1620 1621 /* 1622 * troff commands 1623 */ 1624 static const struct mactab troffmactab[] = { 1625 M(NONE, '\\','"', skip), /* comment */ 1626 M(NOMAC, 'd','e', domacro), /* define */ 1627 M(NOMAC, 'i','g', domacro), /* ignore till .. */ 1628 M(NOMAC, 'a','m', domacro), /* append macro */ 1629 M(NBLK, 'n','f', nf), /* filled */ 1630 M(NBLK, 'c','e', ce), /* centered */ 1631 1632 M(NONE, 's','o', so), /* source a file */ 1633 M(NONE, 'n','x', nx), /* go to next file */ 1634 1635 M(NONE, 't','m', skip), /* print string on tty */ 1636 M(NONE, 'h','w', skip), /* exception hyphen words */ 1637 M(NONE, 0,0, 0) 1638 }; 1639 1640 /* 1641 * Preprocessor output 1642 */ 1643 static const struct mactab ppmactab[] = { 1644 M(FNEST, 'E','Q', EQ), /* equation starting */ 1645 M(FNEST, 'T','S', intbl), /* table starting */ 1646 M(FNEST, 'T','C', intbl), /* alternative table? */ 1647 M(FNEST, 'T','&', intbl), /* table reformatting */ 1648 M(NONE, 'T','E', outtbl),/* table ending */ 1649 M(NONE, 'P','S', PS), /* picture starting */ 1650 M(NONE, 0,0, 0) 1651 }; 1652 1653 /* 1654 * Particular to ms and mm 1655 */ 1656 static const struct mactab msmactab[] = { 1657 M(NONE, 'T','L', skiptocom), /* title follows */ 1658 M(NONE, 'F','S', skiptocom), /* start footnote */ 1659 M(NONE, 'O','K', skiptocom), /* Other kws */ 1660 1661 M(NONE, 'N','R', skip), /* undocumented */ 1662 M(NONE, 'N','D', skip), /* use supplied date */ 1663 1664 M(PARAG, 'P','P', PP), /* begin parag */ 1665 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */ 1666 M(PARAG, 'L','P', PP), /* left blocked parag */ 1667 1668 M(NONE, 'A','U', AU), /* author */ 1669 M(NONE, 'A','I', AU), /* authors institution */ 1670 1671 M(NONE, 'S','H', SH), /* section heading */ 1672 M(NONE, 'S','N', SH), /* undocumented */ 1673 M(NONE, 'U','X', UX), /* unix */ 1674 1675 M(NBLK, 'D','S', mssnblock), /* start display text */ 1676 M(NBLK, 'K','S', mssnblock), /* start keep */ 1677 M(NBLK, 'K','F', mssnblock), /* start float keep */ 1678 M(NONE, 0,0, 0) 1679 }; 1680 1681 static const struct mactab mmmactab[] = { 1682 M(NONE, 'H',' ', MMHU), /* -mm ? */ 1683 M(NONE, 'H','U', MMHU), /* -mm ? */ 1684 M(PARAG, 'P',' ', PP), /* paragraph for -mm */ 1685 M(NBLK, 'N','S', mssnblock), /* undocumented */ 1686 M(NONE, 0,0, 0) 1687 }; 1688 1689 static const struct mactab memactab[] = { 1690 M(PARAG, 'p','p', mepp), 1691 M(PARAG, 'l','p', mepp), 1692 M(PARAG, 'n','p', mepp), 1693 M(NONE, 'i','p', meip), 1694 1695 M(NONE, 's','h', mesh), 1696 M(NONE, 'u','h', mesh), 1697 1698 M(NBLK, '(','l', mesnblock), 1699 M(NBLK, '(','q', mesnblock), 1700 M(NBLK, '(','b', mesnblock), 1701 M(NBLK, '(','z', mesnblock), 1702 M(NBLK, '(','c', mesnblock), 1703 1704 M(NBLK, '(','d', mesnblock), 1705 M(NBLK, '(','f', mesnblock), 1706 M(NBLK, '(','x', mesnblock), 1707 1708 M(NONE, 'r',' ', mefont), 1709 M(NONE, 'i',' ', mefont), 1710 M(NONE, 'b',' ', mefont), 1711 M(NONE, 'u',' ', mefont), 1712 M(NONE, 'q',' ', mefont), 1713 M(NONE, 'r','b', mefont), 1714 M(NONE, 'b','i', mefont), 1715 M(NONE, 'b','x', mefont), 1716 M(NONE, 0,0, 0) 1717 }; 1718 1719 static const struct mactab manmactab[] = { 1720 M(PARAG, 'B','I', manfont), 1721 M(PARAG, 'B','R', manfont), 1722 M(PARAG, 'I','B', manfont), 1723 M(PARAG, 'I','R', manfont), 1724 M(PARAG, 'R','B', manfont), 1725 M(PARAG, 'R','I', manfont), 1726 1727 M(PARAG, 'P','P', manpp), 1728 M(PARAG, 'L','P', manpp), 1729 M(PARAG, 'H','P', manpp), 1730 M(NONE, 0,0, 0) 1731 }; 1732